{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Use PPO to Play Acrobot-v1\n",
    "\n",
    "TensorFlow version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import sys\n",
    "import logging\n",
    "import itertools\n",
    "\n",
    "import numpy as np\n",
    "np.random.seed(0)\n",
    "import pandas as pd\n",
    "import scipy.signal as signal\n",
    "import gym\n",
    "import matplotlib.pyplot as plt\n",
    "import tensorflow.compat.v2 as tf\n",
    "tf.random.set_seed(0)\n",
    "from tensorflow import keras\n",
    "from tensorflow import nn\n",
    "from tensorflow import optimizers\n",
    "from tensorflow import losses\n",
    "from tensorflow.keras import layers\n",
    "\n",
    "logging.basicConfig(level=logging.DEBUG,\n",
    "        format='%(asctime)s [%(levelname)s] %(message)s',\n",
    "        stream=sys.stdout, datefmt='%H:%M:%S')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "03:33:25 [INFO] env: <AcrobotEnv<Acrobot-v1>>\n",
      "03:33:25 [INFO] action_space: Discrete(3)\n",
      "03:33:25 [INFO] observation_space: Box(-28.274333953857422, 28.274333953857422, (6,), float32)\n",
      "03:33:25 [INFO] reward_range: (-inf, inf)\n",
      "03:33:25 [INFO] metadata: {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 15}\n",
      "03:33:25 [INFO] _max_episode_steps: 500\n",
      "03:33:25 [INFO] _elapsed_steps: None\n",
      "03:33:25 [INFO] id: Acrobot-v1\n",
      "03:33:25 [INFO] entry_point: gym.envs.classic_control:AcrobotEnv\n",
      "03:33:25 [INFO] reward_threshold: -100.0\n",
      "03:33:25 [INFO] nondeterministic: False\n",
      "03:33:25 [INFO] max_episode_steps: 500\n",
      "03:33:25 [INFO] _kwargs: {}\n",
      "03:33:25 [INFO] _env_name: Acrobot\n"
     ]
    }
   ],
   "source": [
    "env = gym.make('Acrobot-v1')\n",
    "env.seed(0)\n",
    "for key in vars(env):\n",
    "    logging.info('%s: %s', key, vars(env)[key])\n",
    "for key in vars(env.spec):\n",
    "    logging.info('%s: %s', key, vars(env.spec)[key])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class PPOReplayer:\n",
    "    def __init__(self):\n",
    "        self.fields = ['state', 'action', 'prob', 'advantage', 'return']\n",
    "        self.memory = pd.DataFrame(columns=self.fields)\n",
    "\n",
    "    def store(self, df):\n",
    "        self.memory = pd.concat([self.memory, df[self.fields]], ignore_index=True)\n",
    "\n",
    "    def sample(self, size):\n",
    "        indices = np.random.choice(self.memory.shape[0], size=size)\n",
    "        return (np.stack(self.memory.loc[indices, field]) for field in\n",
    "                self.fields)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "class PPOAgent:\n",
    "    def __init__(self, env):\n",
    "        self.action_n = env.action_space.n\n",
    "        self.gamma = 0.99\n",
    "\n",
    "        self.replayer = PPOReplayer()\n",
    "\n",
    "        self.actor_net = self.build_net(hidden_sizes=[100,],\n",
    "                output_size=self.action_n, output_activation=nn.softmax,\n",
    "                learning_rate=0.001)\n",
    "        self.critic_net = self.build_net(hidden_sizes=[100,],\n",
    "                learning_rate=0.002)\n",
    "\n",
    "    def build_net(self, input_size=None, hidden_sizes=None, output_size=1,\n",
    "                activation=nn.relu, output_activation=None,\n",
    "                loss=losses.mse, learning_rate=0.001):\n",
    "        model = keras.Sequential()\n",
    "        for hidden_size in hidden_sizes:\n",
    "            model.add(layers.Dense(units=hidden_size,\n",
    "                    activation=activation))\n",
    "        model.add(layers.Dense(units=output_size,\n",
    "                activation=output_activation))\n",
    "        optimizer = optimizers.Adam(learning_rate)\n",
    "        model.compile(optimizer=optimizer, loss=loss)\n",
    "        return model\n",
    "\n",
    "    def reset(self, mode=None):\n",
    "        self.mode = mode\n",
    "        if self.mode == 'train':\n",
    "            self.trajectory = []\n",
    "\n",
    "    def step(self, observation, reward, done):\n",
    "        probs = self.actor_net.predict(observation[np.newaxis])[0]\n",
    "        action = np.random.choice(self.action_n, p=probs)\n",
    "        if self.mode == 'train':\n",
    "            self.trajectory += [observation, reward, done, action]\n",
    "        return action\n",
    "\n",
    "    def close(self):\n",
    "        if self.mode == 'train':\n",
    "            self.save_trajectory_to_replayer()\n",
    "            if len(self.replayer.memory) >= 1000:\n",
    "                for batch in range(5): # learn multiple times\n",
    "                    self.learn()\n",
    "                self.replayer = PPOReplayer() # reset replayer after the agent changes itself\n",
    "\n",
    "    def save_trajectory_to_replayer(self):\n",
    "        df = pd.DataFrame(\n",
    "                np.array(self.trajectory, dtype=object).reshape(-1, 4),\n",
    "                columns=['state', 'reward', 'done', 'action'], dtype=object)\n",
    "        states = np.stack(df['state'])\n",
    "        df['v'] = self.critic_net.predict(states)\n",
    "        pis = self.actor_net.predict(states)\n",
    "        df['prob'] = [pi[action] for pi, action in zip(pis, df['action'])]\n",
    "        df['next_v'] = df['v'].shift(-1).fillna(0.)\n",
    "        df['u'] = df['reward'] + self.gamma * df['next_v']\n",
    "        df['delta'] = df['u'] - df['v']\n",
    "        df['advantage'] = signal.lfilter([1.,], [1., -self.gamma],\n",
    "                df['delta'][::-1])[::-1]\n",
    "        df['return'] = signal.lfilter([1.,], [1., -self.gamma],\n",
    "                df['reward'][::-1])[::-1]\n",
    "        self.replayer.store(df)\n",
    "\n",
    "    def learn(self):\n",
    "        states, actions, old_pis, advantages, returns = \\\n",
    "                self.replayer.sample(size=64)\n",
    "        state_tensor = tf.convert_to_tensor(states, dtype=tf.float32)\n",
    "        action_tensor = tf.convert_to_tensor(actions, dtype=tf.int32)\n",
    "        old_pi_tensor = tf.convert_to_tensor(old_pis, dtype=tf.float32)\n",
    "        advantage_tensor = tf.convert_to_tensor(advantages, dtype=tf.float32)\n",
    "\n",
    "        # train actor\n",
    "        with tf.GradientTape() as tape:\n",
    "            all_pi_tensor = self.actor_net(state_tensor)\n",
    "            pi_tensor = tf.gather(all_pi_tensor, action_tensor, batch_dims=1)\n",
    "            surrogate_advantage_tensor = (pi_tensor / old_pi_tensor) * \\\n",
    "                    advantage_tensor\n",
    "            clip_times_advantage_tensor = 0.1 * surrogate_advantage_tensor\n",
    "            max_surrogate_advantage_tensor = advantage_tensor + \\\n",
    "                    tf.where(advantage_tensor > 0.,\n",
    "                    clip_times_advantage_tensor, -clip_times_advantage_tensor)\n",
    "            clipped_surrogate_advantage_tensor = tf.minimum(\n",
    "                    surrogate_advantage_tensor, max_surrogate_advantage_tensor)\n",
    "            loss_tensor = -tf.reduce_mean(clipped_surrogate_advantage_tensor)\n",
    "        actor_grads = tape.gradient(loss_tensor, self.actor_net.variables)\n",
    "        self.actor_net.optimizer.apply_gradients(\n",
    "                zip(actor_grads, self.actor_net.variables))\n",
    "\n",
    "        # train critic\n",
    "        self.critic_net.fit(states, returns, verbose=0)\n",
    "\n",
    "\n",
    "agent = PPOAgent(env)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "03:33:26 [INFO] ==== train ====\n",
      "03:34:14 [DEBUG] train episode 0: reward = -500.00, steps = 500\n",
      "03:34:14 [INFO] NumExpr defaulting to 8 threads.\n",
      "03:35:03 [DEBUG] train episode 1: reward = -500.00, steps = 500\n",
      "03:35:52 [DEBUG] train episode 2: reward = -500.00, steps = 500\n",
      "03:36:40 [DEBUG] train episode 3: reward = -500.00, steps = 500\n",
      "03:37:27 [DEBUG] train episode 4: reward = -485.00, steps = 486\n",
      "03:38:14 [DEBUG] train episode 5: reward = -500.00, steps = 500\n",
      "03:38:45 [DEBUG] train episode 6: reward = -339.00, steps = 340\n",
      "03:39:27 [DEBUG] train episode 7: reward = -448.00, steps = 449\n",
      "03:39:54 [DEBUG] train episode 8: reward = -292.00, steps = 293\n",
      "03:40:29 [DEBUG] train episode 9: reward = -383.00, steps = 384\n",
      "03:41:16 [DEBUG] train episode 10: reward = -500.00, steps = 500\n",
      "03:41:42 [DEBUG] train episode 11: reward = -283.00, steps = 284\n",
      "03:42:05 [DEBUG] train episode 12: reward = -254.00, steps = 255\n",
      "03:42:30 [DEBUG] train episode 13: reward = -267.00, steps = 268\n",
      "03:43:16 [DEBUG] train episode 14: reward = -500.00, steps = 500\n",
      "03:44:01 [DEBUG] train episode 15: reward = -500.00, steps = 500\n",
      "03:44:47 [DEBUG] train episode 16: reward = -500.00, steps = 500\n",
      "03:45:31 [DEBUG] train episode 17: reward = -500.00, steps = 500\n",
      "03:46:17 [DEBUG] train episode 18: reward = -500.00, steps = 500\n",
      "03:47:03 [DEBUG] train episode 19: reward = -500.00, steps = 500\n",
      "03:47:50 [DEBUG] train episode 20: reward = -500.00, steps = 500\n",
      "03:48:36 [DEBUG] train episode 21: reward = -500.00, steps = 500\n",
      "03:49:23 [DEBUG] train episode 22: reward = -500.00, steps = 500\n",
      "03:50:09 [DEBUG] train episode 23: reward = -500.00, steps = 500\n",
      "03:50:45 [DEBUG] train episode 24: reward = -390.00, steps = 391\n",
      "03:51:31 [DEBUG] train episode 25: reward = -500.00, steps = 500\n",
      "03:52:15 [DEBUG] train episode 26: reward = -487.00, steps = 488\n",
      "03:53:02 [DEBUG] train episode 27: reward = -500.00, steps = 500\n",
      "03:53:48 [DEBUG] train episode 28: reward = -500.00, steps = 500\n",
      "03:54:33 [DEBUG] train episode 29: reward = -500.00, steps = 500\n",
      "03:55:18 [DEBUG] train episode 30: reward = -500.00, steps = 500\n",
      "03:56:04 [DEBUG] train episode 31: reward = -500.00, steps = 500\n",
      "03:56:40 [DEBUG] train episode 32: reward = -398.00, steps = 399\n",
      "03:57:16 [DEBUG] train episode 33: reward = -392.00, steps = 393\n",
      "03:58:01 [DEBUG] train episode 34: reward = -500.00, steps = 500\n",
      "03:58:46 [DEBUG] train episode 35: reward = -500.00, steps = 500\n",
      "03:59:32 [DEBUG] train episode 36: reward = -492.00, steps = 493\n",
      "04:00:17 [DEBUG] train episode 37: reward = -500.00, steps = 500\n",
      "04:01:03 [DEBUG] train episode 38: reward = -500.00, steps = 500\n",
      "04:01:49 [DEBUG] train episode 39: reward = -500.00, steps = 500\n",
      "04:02:33 [DEBUG] train episode 40: reward = -500.00, steps = 500\n",
      "04:03:01 [DEBUG] train episode 41: reward = -302.00, steps = 303\n",
      "04:03:26 [DEBUG] train episode 42: reward = -266.00, steps = 267\n",
      "04:03:59 [DEBUG] train episode 43: reward = -357.00, steps = 358\n",
      "04:04:37 [DEBUG] train episode 44: reward = -416.00, steps = 417\n",
      "04:05:17 [DEBUG] train episode 45: reward = -423.00, steps = 424\n",
      "04:06:03 [DEBUG] train episode 46: reward = -500.00, steps = 500\n",
      "04:06:48 [DEBUG] train episode 47: reward = -500.00, steps = 500\n",
      "04:07:35 [DEBUG] train episode 48: reward = -500.00, steps = 500\n",
      "04:08:20 [DEBUG] train episode 49: reward = -500.00, steps = 500\n",
      "04:09:07 [DEBUG] train episode 50: reward = -500.00, steps = 500\n",
      "04:09:53 [DEBUG] train episode 51: reward = -500.00, steps = 500\n",
      "04:10:40 [DEBUG] train episode 52: reward = -500.00, steps = 500\n",
      "04:11:26 [DEBUG] train episode 53: reward = -500.00, steps = 500\n",
      "04:12:13 [DEBUG] train episode 54: reward = -500.00, steps = 500\n",
      "04:13:00 [DEBUG] train episode 55: reward = -500.00, steps = 500\n",
      "04:13:47 [DEBUG] train episode 56: reward = -500.00, steps = 500\n",
      "04:14:33 [DEBUG] train episode 57: reward = -500.00, steps = 500\n",
      "04:15:20 [DEBUG] train episode 58: reward = -500.00, steps = 500\n",
      "04:16:05 [DEBUG] train episode 59: reward = -500.00, steps = 500\n",
      "04:16:51 [DEBUG] train episode 60: reward = -500.00, steps = 500\n",
      "04:17:37 [DEBUG] train episode 61: reward = -500.00, steps = 500\n",
      "04:18:24 [DEBUG] train episode 62: reward = -500.00, steps = 500\n",
      "04:19:10 [DEBUG] train episode 63: reward = -500.00, steps = 500\n",
      "04:19:55 [DEBUG] train episode 64: reward = -500.00, steps = 500\n",
      "04:20:41 [DEBUG] train episode 65: reward = -500.00, steps = 500\n",
      "04:21:27 [DEBUG] train episode 66: reward = -500.00, steps = 500\n",
      "04:22:13 [DEBUG] train episode 67: reward = -500.00, steps = 500\n",
      "04:23:00 [DEBUG] train episode 68: reward = -500.00, steps = 500\n",
      "04:23:45 [DEBUG] train episode 69: reward = -500.00, steps = 500\n",
      "04:24:31 [DEBUG] train episode 70: reward = -500.00, steps = 500\n",
      "04:25:15 [DEBUG] train episode 71: reward = -479.00, steps = 480\n",
      "04:26:00 [DEBUG] train episode 72: reward = -500.00, steps = 500\n",
      "04:26:46 [DEBUG] train episode 73: reward = -500.00, steps = 500\n",
      "04:27:32 [DEBUG] train episode 74: reward = -500.00, steps = 500\n",
      "04:28:18 [DEBUG] train episode 75: reward = -500.00, steps = 500\n",
      "04:29:04 [DEBUG] train episode 76: reward = -500.00, steps = 500\n",
      "04:29:51 [DEBUG] train episode 77: reward = -500.00, steps = 500\n",
      "04:30:37 [DEBUG] train episode 78: reward = -500.00, steps = 500\n",
      "04:31:22 [DEBUG] train episode 79: reward = -500.00, steps = 500\n",
      "04:32:08 [DEBUG] train episode 80: reward = -500.00, steps = 500\n",
      "04:32:55 [DEBUG] train episode 81: reward = -500.00, steps = 500\n",
      "04:33:41 [DEBUG] train episode 82: reward = -500.00, steps = 500\n",
      "04:34:27 [DEBUG] train episode 83: reward = -500.00, steps = 500\n",
      "04:35:14 [DEBUG] train episode 84: reward = -500.00, steps = 500\n",
      "04:36:00 [DEBUG] train episode 85: reward = -500.00, steps = 500\n",
      "04:36:47 [DEBUG] train episode 86: reward = -500.00, steps = 500\n",
      "04:37:33 [DEBUG] train episode 87: reward = -500.00, steps = 500\n",
      "04:38:24 [DEBUG] train episode 88: reward = -500.00, steps = 500\n",
      "04:39:09 [DEBUG] train episode 89: reward = -474.00, steps = 475\n",
      "04:39:55 [DEBUG] train episode 90: reward = -500.00, steps = 500\n",
      "04:40:39 [DEBUG] train episode 91: reward = -480.00, steps = 481\n",
      "04:41:27 [DEBUG] train episode 92: reward = -500.00, steps = 500\n",
      "04:42:13 [DEBUG] train episode 93: reward = -500.00, steps = 500\n",
      "04:43:00 [DEBUG] train episode 94: reward = -500.00, steps = 500\n",
      "04:43:46 [DEBUG] train episode 95: reward = -500.00, steps = 500\n",
      "04:44:33 [DEBUG] train episode 96: reward = -500.00, steps = 500\n",
      "04:45:19 [DEBUG] train episode 97: reward = -500.00, steps = 500\n",
      "04:45:55 [DEBUG] train episode 98: reward = -382.00, steps = 383\n",
      "04:46:41 [DEBUG] train episode 99: reward = -500.00, steps = 500\n",
      "04:47:27 [DEBUG] train episode 100: reward = -500.00, steps = 500\n",
      "04:48:14 [DEBUG] train episode 101: reward = -500.00, steps = 500\n",
      "04:48:42 [DEBUG] train episode 102: reward = -298.00, steps = 299\n",
      "04:49:25 [DEBUG] train episode 103: reward = -467.00, steps = 468\n",
      "04:50:13 [DEBUG] train episode 104: reward = -500.00, steps = 500\n",
      "04:51:03 [DEBUG] train episode 105: reward = -500.00, steps = 500\n",
      "04:51:50 [DEBUG] train episode 106: reward = -500.00, steps = 500\n",
      "04:52:36 [DEBUG] train episode 107: reward = -500.00, steps = 500\n",
      "04:53:23 [DEBUG] train episode 108: reward = -500.00, steps = 500\n",
      "04:54:09 [DEBUG] train episode 109: reward = -500.00, steps = 500\n",
      "04:54:47 [DEBUG] train episode 110: reward = -393.00, steps = 394\n",
      "04:55:33 [DEBUG] train episode 111: reward = -500.00, steps = 500\n",
      "04:56:17 [DEBUG] train episode 112: reward = -500.00, steps = 500\n",
      "04:57:03 [DEBUG] train episode 113: reward = -500.00, steps = 500\n",
      "04:57:49 [DEBUG] train episode 114: reward = -500.00, steps = 500\n",
      "04:58:36 [DEBUG] train episode 115: reward = -500.00, steps = 500\n",
      "04:59:22 [DEBUG] train episode 116: reward = -500.00, steps = 500\n",
      "05:00:07 [DEBUG] train episode 117: reward = -500.00, steps = 500\n",
      "05:00:53 [DEBUG] train episode 118: reward = -500.00, steps = 500\n",
      "05:01:38 [DEBUG] train episode 119: reward = -500.00, steps = 500\n",
      "05:02:17 [DEBUG] train episode 120: reward = -417.00, steps = 418\n",
      "05:03:02 [DEBUG] train episode 121: reward = -500.00, steps = 500\n",
      "05:03:47 [DEBUG] train episode 122: reward = -500.00, steps = 500\n",
      "05:04:31 [DEBUG] train episode 123: reward = -493.00, steps = 494\n",
      "05:05:14 [DEBUG] train episode 124: reward = -480.00, steps = 481\n",
      "05:05:38 [DEBUG] train episode 125: reward = -254.00, steps = 255\n",
      "05:06:11 [DEBUG] train episode 126: reward = -369.00, steps = 370\n",
      "05:06:44 [DEBUG] train episode 127: reward = -371.00, steps = 372\n",
      "05:07:30 [DEBUG] train episode 128: reward = -496.00, steps = 497\n",
      "05:07:58 [DEBUG] train episode 129: reward = -316.00, steps = 317\n",
      "05:08:28 [DEBUG] train episode 130: reward = -324.00, steps = 325\n",
      "05:09:03 [DEBUG] train episode 131: reward = -388.00, steps = 389\n",
      "05:09:41 [DEBUG] train episode 132: reward = -418.00, steps = 419\n",
      "05:10:14 [DEBUG] train episode 133: reward = -359.00, steps = 360\n",
      "05:10:59 [DEBUG] train episode 134: reward = -500.00, steps = 500\n",
      "05:11:47 [DEBUG] train episode 135: reward = -500.00, steps = 500\n",
      "05:12:21 [DEBUG] train episode 136: reward = -365.00, steps = 366\n",
      "05:12:43 [DEBUG] train episode 137: reward = -238.00, steps = 239\n",
      "05:13:13 [DEBUG] train episode 138: reward = -332.00, steps = 333\n",
      "05:13:58 [DEBUG] train episode 139: reward = -500.00, steps = 500\n",
      "05:14:35 [DEBUG] train episode 140: reward = -401.00, steps = 402\n",
      "05:15:02 [DEBUG] train episode 141: reward = -305.00, steps = 306\n",
      "05:15:22 [DEBUG] train episode 142: reward = -225.00, steps = 226\n",
      "05:15:41 [DEBUG] train episode 143: reward = -227.00, steps = 228\n",
      "05:16:08 [DEBUG] train episode 144: reward = -300.00, steps = 301\n",
      "05:16:35 [DEBUG] train episode 145: reward = -308.00, steps = 309\n",
      "05:17:07 [DEBUG] train episode 146: reward = -365.00, steps = 366\n",
      "05:17:31 [DEBUG] train episode 147: reward = -267.00, steps = 268\n",
      "05:18:00 [DEBUG] train episode 148: reward = -319.00, steps = 320\n",
      "05:18:27 [DEBUG] train episode 149: reward = -312.00, steps = 313\n",
      "05:18:40 [DEBUG] train episode 150: reward = -150.00, steps = 151\n",
      "05:19:22 [DEBUG] train episode 151: reward = -472.00, steps = 473\n",
      "05:19:43 [DEBUG] train episode 152: reward = -227.00, steps = 228\n",
      "05:20:09 [DEBUG] train episode 153: reward = -298.00, steps = 299\n",
      "05:20:27 [DEBUG] train episode 154: reward = -211.00, steps = 212\n",
      "05:20:49 [DEBUG] train episode 155: reward = -244.00, steps = 245\n",
      "05:21:09 [DEBUG] train episode 156: reward = -238.00, steps = 239\n",
      "05:21:32 [DEBUG] train episode 157: reward = -255.00, steps = 256\n",
      "05:21:51 [DEBUG] train episode 158: reward = -220.00, steps = 221\n",
      "05:22:21 [DEBUG] train episode 159: reward = -343.00, steps = 344\n",
      "05:22:35 [DEBUG] train episode 160: reward = -161.00, steps = 162\n",
      "05:22:54 [DEBUG] train episode 161: reward = -210.00, steps = 211\n",
      "05:23:20 [DEBUG] train episode 162: reward = -296.00, steps = 297\n",
      "05:23:38 [DEBUG] train episode 163: reward = -196.00, steps = 197\n",
      "05:23:58 [DEBUG] train episode 164: reward = -235.00, steps = 236\n",
      "05:24:24 [DEBUG] train episode 165: reward = -292.00, steps = 293\n",
      "05:24:41 [DEBUG] train episode 166: reward = -191.00, steps = 192\n",
      "05:25:18 [DEBUG] train episode 167: reward = -415.00, steps = 416\n",
      "05:25:36 [DEBUG] train episode 168: reward = -209.00, steps = 210\n",
      "05:26:05 [DEBUG] train episode 169: reward = -330.00, steps = 331\n",
      "05:26:21 [DEBUG] train episode 170: reward = -173.00, steps = 174\n",
      "05:26:37 [DEBUG] train episode 171: reward = -189.00, steps = 190\n",
      "05:26:50 [DEBUG] train episode 172: reward = -142.00, steps = 143\n",
      "05:27:10 [DEBUG] train episode 173: reward = -234.00, steps = 235\n",
      "05:27:28 [DEBUG] train episode 174: reward = -197.00, steps = 198\n",
      "05:27:43 [DEBUG] train episode 175: reward = -180.00, steps = 181\n",
      "05:28:09 [DEBUG] train episode 176: reward = -284.00, steps = 285\n",
      "05:28:36 [DEBUG] train episode 177: reward = -314.00, steps = 315\n",
      "05:28:53 [DEBUG] train episode 178: reward = -190.00, steps = 191\n",
      "05:29:11 [DEBUG] train episode 179: reward = -209.00, steps = 210\n",
      "05:29:29 [DEBUG] train episode 180: reward = -202.00, steps = 203\n",
      "05:29:41 [DEBUG] train episode 181: reward = -136.00, steps = 137\n",
      "05:29:54 [DEBUG] train episode 182: reward = -142.00, steps = 143\n",
      "05:30:16 [DEBUG] train episode 183: reward = -261.00, steps = 262\n",
      "05:30:34 [DEBUG] train episode 184: reward = -195.00, steps = 196\n",
      "05:30:49 [DEBUG] train episode 185: reward = -170.00, steps = 171\n",
      "05:31:04 [DEBUG] train episode 186: reward = -161.00, steps = 162\n",
      "05:31:21 [DEBUG] train episode 187: reward = -200.00, steps = 201\n",
      "05:31:33 [DEBUG] train episode 188: reward = -130.00, steps = 131\n",
      "05:31:45 [DEBUG] train episode 189: reward = -133.00, steps = 134\n",
      "05:32:09 [DEBUG] train episode 190: reward = -275.00, steps = 276\n",
      "05:32:25 [DEBUG] train episode 191: reward = -183.00, steps = 184\n",
      "05:32:44 [DEBUG] train episode 192: reward = -204.00, steps = 205\n",
      "05:33:02 [DEBUG] train episode 193: reward = -204.00, steps = 205\n",
      "05:33:24 [DEBUG] train episode 194: reward = -255.00, steps = 256\n",
      "05:33:38 [DEBUG] train episode 195: reward = -154.00, steps = 155\n",
      "05:33:55 [DEBUG] train episode 196: reward = -189.00, steps = 190\n",
      "05:34:09 [DEBUG] train episode 197: reward = -156.00, steps = 157\n",
      "05:34:21 [DEBUG] train episode 198: reward = -135.00, steps = 136\n",
      "05:34:32 [DEBUG] train episode 199: reward = -118.00, steps = 119\n",
      "05:34:50 [DEBUG] train episode 200: reward = -196.00, steps = 197\n",
      "05:35:06 [DEBUG] train episode 201: reward = -175.00, steps = 176\n",
      "05:35:23 [DEBUG] train episode 202: reward = -202.00, steps = 203\n",
      "05:35:36 [DEBUG] train episode 203: reward = -144.00, steps = 145\n",
      "05:35:52 [DEBUG] train episode 204: reward = -172.00, steps = 173\n",
      "05:36:05 [DEBUG] train episode 205: reward = -135.00, steps = 136\n",
      "05:36:17 [DEBUG] train episode 206: reward = -145.00, steps = 146\n",
      "05:36:30 [DEBUG] train episode 207: reward = -147.00, steps = 148\n",
      "05:36:42 [DEBUG] train episode 208: reward = -122.00, steps = 123\n",
      "05:36:58 [DEBUG] train episode 209: reward = -176.00, steps = 177\n",
      "05:37:10 [DEBUG] train episode 210: reward = -136.00, steps = 137\n",
      "05:37:23 [DEBUG] train episode 211: reward = -134.00, steps = 135\n",
      "05:37:36 [DEBUG] train episode 212: reward = -140.00, steps = 141\n",
      "05:37:50 [DEBUG] train episode 213: reward = -144.00, steps = 145\n",
      "05:38:11 [DEBUG] train episode 214: reward = -238.00, steps = 239\n",
      "05:38:31 [DEBUG] train episode 215: reward = -218.00, steps = 219\n",
      "05:38:50 [DEBUG] train episode 216: reward = -211.00, steps = 212\n",
      "05:39:05 [DEBUG] train episode 217: reward = -155.00, steps = 156\n",
      "05:39:14 [DEBUG] train episode 218: reward = -112.00, steps = 113\n",
      "05:39:27 [DEBUG] train episode 219: reward = -144.00, steps = 145\n",
      "05:39:59 [DEBUG] train episode 220: reward = -364.00, steps = 365\n",
      "05:40:13 [DEBUG] train episode 221: reward = -165.00, steps = 166\n",
      "05:40:27 [DEBUG] train episode 222: reward = -153.00, steps = 154\n",
      "05:40:38 [DEBUG] train episode 223: reward = -124.00, steps = 125\n",
      "05:40:53 [DEBUG] train episode 224: reward = -169.00, steps = 170\n",
      "05:41:03 [DEBUG] train episode 225: reward = -110.00, steps = 111\n",
      "05:41:19 [DEBUG] train episode 226: reward = -179.00, steps = 180\n",
      "05:41:34 [DEBUG] train episode 227: reward = -164.00, steps = 165\n",
      "05:41:43 [DEBUG] train episode 228: reward = -103.00, steps = 104\n",
      "05:41:53 [DEBUG] train episode 229: reward = -109.00, steps = 110\n",
      "05:42:07 [DEBUG] train episode 230: reward = -160.00, steps = 161\n",
      "05:42:25 [DEBUG] train episode 231: reward = -198.00, steps = 199\n",
      "05:42:36 [DEBUG] train episode 232: reward = -114.00, steps = 115\n",
      "05:42:53 [DEBUG] train episode 233: reward = -189.00, steps = 190\n",
      "05:43:05 [DEBUG] train episode 234: reward = -131.00, steps = 132\n",
      "05:43:18 [DEBUG] train episode 235: reward = -154.00, steps = 155\n",
      "05:43:33 [DEBUG] train episode 236: reward = -161.00, steps = 162\n",
      "05:43:46 [DEBUG] train episode 237: reward = -139.00, steps = 140\n",
      "05:43:59 [DEBUG] train episode 238: reward = -138.00, steps = 139\n",
      "05:44:13 [DEBUG] train episode 239: reward = -164.00, steps = 165\n",
      "05:44:26 [DEBUG] train episode 240: reward = -141.00, steps = 142\n",
      "05:44:42 [DEBUG] train episode 241: reward = -182.00, steps = 183\n",
      "05:44:56 [DEBUG] train episode 242: reward = -161.00, steps = 162\n",
      "05:45:07 [DEBUG] train episode 243: reward = -120.00, steps = 121\n",
      "05:45:17 [DEBUG] train episode 244: reward = -113.00, steps = 114\n",
      "05:45:29 [DEBUG] train episode 245: reward = -129.00, steps = 130\n",
      "05:45:41 [DEBUG] train episode 246: reward = -142.00, steps = 143\n",
      "05:45:53 [DEBUG] train episode 247: reward = -135.00, steps = 136\n",
      "05:46:04 [DEBUG] train episode 248: reward = -120.00, steps = 121\n",
      "05:46:16 [DEBUG] train episode 249: reward = -138.00, steps = 139\n",
      "05:46:31 [DEBUG] train episode 250: reward = -171.00, steps = 172\n",
      "05:46:46 [DEBUG] train episode 251: reward = -162.00, steps = 163\n",
      "05:47:03 [DEBUG] train episode 252: reward = -188.00, steps = 189\n",
      "05:47:14 [DEBUG] train episode 253: reward = -124.00, steps = 125\n",
      "05:47:33 [DEBUG] train episode 254: reward = -210.00, steps = 211\n",
      "05:48:17 [DEBUG] train episode 255: reward = -500.00, steps = 500\n",
      "05:48:32 [DEBUG] train episode 256: reward = -166.00, steps = 167\n",
      "05:48:42 [DEBUG] train episode 257: reward = -111.00, steps = 112\n",
      "05:48:55 [DEBUG] train episode 258: reward = -145.00, steps = 146\n",
      "05:49:07 [DEBUG] train episode 259: reward = -141.00, steps = 142\n",
      "05:49:19 [DEBUG] train episode 260: reward = -133.00, steps = 134\n",
      "05:49:36 [DEBUG] train episode 261: reward = -187.00, steps = 188\n",
      "05:49:48 [DEBUG] train episode 262: reward = -140.00, steps = 141\n",
      "05:49:59 [DEBUG] train episode 263: reward = -118.00, steps = 119\n",
      "05:50:12 [DEBUG] train episode 264: reward = -141.00, steps = 142\n",
      "05:50:23 [DEBUG] train episode 265: reward = -119.00, steps = 120\n",
      "05:50:39 [DEBUG] train episode 266: reward = -184.00, steps = 185\n",
      "05:50:50 [DEBUG] train episode 267: reward = -122.00, steps = 123\n",
      "05:51:18 [DEBUG] train episode 268: reward = -324.00, steps = 325\n",
      "05:51:31 [DEBUG] train episode 269: reward = -146.00, steps = 147\n",
      "05:51:43 [DEBUG] train episode 270: reward = -133.00, steps = 134\n",
      "05:51:55 [DEBUG] train episode 271: reward = -139.00, steps = 140\n",
      "05:52:09 [DEBUG] train episode 272: reward = -155.00, steps = 156\n",
      "05:52:21 [DEBUG] train episode 273: reward = -130.00, steps = 131\n",
      "05:52:33 [DEBUG] train episode 274: reward = -136.00, steps = 137\n",
      "05:52:47 [DEBUG] train episode 275: reward = -158.00, steps = 159\n",
      "05:52:57 [DEBUG] train episode 276: reward = -107.00, steps = 108\n",
      "05:53:08 [DEBUG] train episode 277: reward = -134.00, steps = 135\n",
      "05:53:21 [DEBUG] train episode 278: reward = -139.00, steps = 140\n",
      "05:53:37 [DEBUG] train episode 279: reward = -187.00, steps = 188\n",
      "05:53:50 [DEBUG] train episode 280: reward = -143.00, steps = 144\n",
      "05:54:02 [DEBUG] train episode 281: reward = -141.00, steps = 142\n",
      "05:54:13 [DEBUG] train episode 282: reward = -120.00, steps = 121\n",
      "05:54:25 [DEBUG] train episode 283: reward = -132.00, steps = 133\n",
      "05:54:38 [DEBUG] train episode 284: reward = -135.00, steps = 136\n",
      "05:54:52 [DEBUG] train episode 285: reward = -160.00, steps = 161\n",
      "05:55:05 [DEBUG] train episode 286: reward = -150.00, steps = 151\n",
      "05:55:18 [DEBUG] train episode 287: reward = -148.00, steps = 149\n",
      "05:55:32 [DEBUG] train episode 288: reward = -151.00, steps = 152\n",
      "05:55:48 [DEBUG] train episode 289: reward = -180.00, steps = 181\n",
      "05:56:01 [DEBUG] train episode 290: reward = -139.00, steps = 140\n",
      "05:56:15 [DEBUG] train episode 291: reward = -147.00, steps = 148\n",
      "05:56:24 [DEBUG] train episode 292: reward = -111.00, steps = 112\n",
      "05:56:34 [DEBUG] train episode 293: reward = -109.00, steps = 110\n",
      "05:56:48 [DEBUG] train episode 294: reward = -151.00, steps = 152\n",
      "05:57:01 [DEBUG] train episode 295: reward = -154.00, steps = 155\n",
      "05:57:22 [DEBUG] train episode 296: reward = -228.00, steps = 229\n",
      "05:57:37 [DEBUG] train episode 297: reward = -165.00, steps = 166\n",
      "05:57:53 [DEBUG] train episode 298: reward = -179.00, steps = 180\n",
      "05:58:08 [DEBUG] train episode 299: reward = -174.00, steps = 175\n",
      "05:58:17 [DEBUG] train episode 300: reward = -96.00, steps = 97\n",
      "05:58:36 [DEBUG] train episode 301: reward = -211.00, steps = 212\n",
      "05:58:49 [DEBUG] train episode 302: reward = -154.00, steps = 155\n",
      "05:59:02 [DEBUG] train episode 303: reward = -145.00, steps = 146\n",
      "05:59:14 [DEBUG] train episode 304: reward = -129.00, steps = 130\n",
      "05:59:26 [DEBUG] train episode 305: reward = -129.00, steps = 130\n",
      "05:59:41 [DEBUG] train episode 306: reward = -168.00, steps = 169\n",
      "05:59:52 [DEBUG] train episode 307: reward = -123.00, steps = 124\n",
      "06:00:05 [DEBUG] train episode 308: reward = -135.00, steps = 136\n",
      "06:00:43 [DEBUG] train episode 309: reward = -429.00, steps = 430\n",
      "06:00:52 [DEBUG] train episode 310: reward = -98.00, steps = 99\n",
      "06:01:05 [DEBUG] train episode 311: reward = -136.00, steps = 137\n",
      "06:01:17 [DEBUG] train episode 312: reward = -131.00, steps = 132\n",
      "06:01:32 [DEBUG] train episode 313: reward = -170.00, steps = 171\n",
      "06:01:46 [DEBUG] train episode 314: reward = -151.00, steps = 152\n",
      "06:02:00 [DEBUG] train episode 315: reward = -167.00, steps = 168\n",
      "06:02:15 [DEBUG] train episode 316: reward = -165.00, steps = 166\n",
      "06:02:29 [DEBUG] train episode 317: reward = -150.00, steps = 151\n",
      "06:02:40 [DEBUG] train episode 318: reward = -124.00, steps = 125\n",
      "06:02:56 [DEBUG] train episode 319: reward = -175.00, steps = 176\n",
      "06:03:07 [DEBUG] train episode 320: reward = -128.00, steps = 129\n",
      "06:03:21 [DEBUG] train episode 321: reward = -160.00, steps = 161\n",
      "06:03:31 [DEBUG] train episode 322: reward = -111.00, steps = 112\n",
      "06:03:41 [DEBUG] train episode 323: reward = -109.00, steps = 110\n",
      "06:03:56 [DEBUG] train episode 324: reward = -165.00, steps = 166\n",
      "06:04:10 [DEBUG] train episode 325: reward = -150.00, steps = 151\n",
      "06:04:20 [DEBUG] train episode 326: reward = -105.00, steps = 106\n",
      "06:04:32 [DEBUG] train episode 327: reward = -136.00, steps = 137\n",
      "06:04:42 [DEBUG] train episode 328: reward = -118.00, steps = 119\n",
      "06:04:57 [DEBUG] train episode 329: reward = -172.00, steps = 173\n",
      "06:05:09 [DEBUG] train episode 330: reward = -127.00, steps = 128\n",
      "06:05:27 [DEBUG] train episode 331: reward = -210.00, steps = 211\n",
      "06:05:38 [DEBUG] train episode 332: reward = -116.00, steps = 117\n",
      "06:05:48 [DEBUG] train episode 333: reward = -105.00, steps = 106\n",
      "06:06:04 [DEBUG] train episode 334: reward = -175.00, steps = 176\n",
      "06:06:15 [DEBUG] train episode 335: reward = -124.00, steps = 125\n",
      "06:06:30 [DEBUG] train episode 336: reward = -161.00, steps = 162\n",
      "06:06:44 [DEBUG] train episode 337: reward = -166.00, steps = 167\n",
      "06:06:53 [DEBUG] train episode 338: reward = -90.00, steps = 91\n",
      "06:07:02 [DEBUG] train episode 339: reward = -112.00, steps = 113\n",
      "06:07:19 [DEBUG] train episode 340: reward = -180.00, steps = 181\n",
      "06:07:33 [DEBUG] train episode 341: reward = -146.00, steps = 147\n",
      "06:07:42 [DEBUG] train episode 342: reward = -109.00, steps = 110\n",
      "06:07:52 [DEBUG] train episode 343: reward = -101.00, steps = 102\n",
      "06:08:02 [DEBUG] train episode 344: reward = -118.00, steps = 119\n",
      "06:08:12 [DEBUG] train episode 345: reward = -104.00, steps = 105\n",
      "06:08:25 [DEBUG] train episode 346: reward = -145.00, steps = 146\n",
      "06:08:34 [DEBUG] train episode 347: reward = -101.00, steps = 102\n",
      "06:08:46 [DEBUG] train episode 348: reward = -126.00, steps = 127\n",
      "06:08:57 [DEBUG] train episode 349: reward = -125.00, steps = 126\n",
      "06:09:06 [DEBUG] train episode 350: reward = -86.00, steps = 87\n",
      "06:09:06 [INFO] ==== test ====\n",
      "06:09:17 [DEBUG] test episode 0: reward = -132.00, steps = 133\n",
      "06:09:28 [DEBUG] test episode 1: reward = -118.00, steps = 119\n",
      "06:09:39 [DEBUG] test episode 2: reward = -139.00, steps = 140\n",
      "06:09:47 [DEBUG] test episode 3: reward = -87.00, steps = 88\n",
      "06:09:57 [DEBUG] test episode 4: reward = -118.00, steps = 119\n",
      "06:10:07 [DEBUG] test episode 5: reward = -117.00, steps = 118\n",
      "06:10:17 [DEBUG] test episode 6: reward = -111.00, steps = 112\n",
      "06:10:27 [DEBUG] test episode 7: reward = -116.00, steps = 117\n",
      "06:10:36 [DEBUG] test episode 8: reward = -106.00, steps = 107\n",
      "06:10:45 [DEBUG] test episode 9: reward = -106.00, steps = 107\n",
      "06:10:55 [DEBUG] test episode 10: reward = -114.00, steps = 115\n",
      "06:11:05 [DEBUG] test episode 11: reward = -122.00, steps = 123\n",
      "06:11:17 [DEBUG] test episode 12: reward = -137.00, steps = 138\n",
      "06:11:26 [DEBUG] test episode 13: reward = -105.00, steps = 106\n",
      "06:11:37 [DEBUG] test episode 14: reward = -133.00, steps = 134\n",
      "06:11:48 [DEBUG] test episode 15: reward = -125.00, steps = 126\n",
      "06:12:02 [DEBUG] test episode 16: reward = -158.00, steps = 159\n",
      "06:12:12 [DEBUG] test episode 17: reward = -115.00, steps = 116\n",
      "06:12:21 [DEBUG] test episode 18: reward = -118.00, steps = 119\n",
      "06:12:32 [DEBUG] test episode 19: reward = -127.00, steps = 128\n",
      "06:12:46 [DEBUG] test episode 20: reward = -165.00, steps = 166\n",
      "06:12:56 [DEBUG] test episode 21: reward = -118.00, steps = 119\n",
      "06:13:09 [DEBUG] test episode 22: reward = -142.00, steps = 143\n",
      "06:13:17 [DEBUG] test episode 23: reward = -103.00, steps = 104\n",
      "06:13:29 [DEBUG] test episode 24: reward = -134.00, steps = 135\n",
      "06:13:50 [DEBUG] test episode 25: reward = -248.00, steps = 249\n",
      "06:13:59 [DEBUG] test episode 26: reward = -111.00, steps = 112\n",
      "06:14:11 [DEBUG] test episode 27: reward = -133.00, steps = 134\n",
      "06:14:25 [DEBUG] test episode 28: reward = -173.00, steps = 174\n",
      "06:14:37 [DEBUG] test episode 29: reward = -134.00, steps = 135\n",
      "06:14:48 [DEBUG] test episode 30: reward = -132.00, steps = 133\n",
      "06:14:58 [DEBUG] test episode 31: reward = -117.00, steps = 118\n",
      "06:15:08 [DEBUG] test episode 32: reward = -115.00, steps = 116\n",
      "06:15:19 [DEBUG] test episode 33: reward = -131.00, steps = 132\n",
      "06:15:28 [DEBUG] test episode 34: reward = -98.00, steps = 99\n",
      "06:15:40 [DEBUG] test episode 35: reward = -141.00, steps = 142\n",
      "06:15:50 [DEBUG] test episode 36: reward = -119.00, steps = 120\n",
      "06:15:59 [DEBUG] test episode 37: reward = -107.00, steps = 108\n",
      "06:16:12 [DEBUG] test episode 38: reward = -149.00, steps = 150\n",
      "06:16:23 [DEBUG] test episode 39: reward = -120.00, steps = 121\n",
      "06:16:45 [DEBUG] test episode 40: reward = -263.00, steps = 264\n",
      "06:16:59 [DEBUG] test episode 41: reward = -171.00, steps = 172\n",
      "06:17:10 [DEBUG] test episode 42: reward = -122.00, steps = 123\n",
      "06:17:19 [DEBUG] test episode 43: reward = -111.00, steps = 112\n",
      "06:17:30 [DEBUG] test episode 44: reward = -120.00, steps = 121\n",
      "06:17:41 [DEBUG] test episode 45: reward = -126.00, steps = 127\n",
      "06:17:58 [DEBUG] test episode 46: reward = -202.00, steps = 203\n",
      "06:18:08 [DEBUG] test episode 47: reward = -124.00, steps = 125\n",
      "06:18:16 [DEBUG] test episode 48: reward = -87.00, steps = 88\n",
      "06:18:24 [DEBUG] test episode 49: reward = -91.00, steps = 92\n",
      "06:18:36 [DEBUG] test episode 50: reward = -146.00, steps = 147\n",
      "06:18:50 [DEBUG] test episode 51: reward = -169.00, steps = 170\n",
      "06:19:01 [DEBUG] test episode 52: reward = -129.00, steps = 130\n",
      "06:19:13 [DEBUG] test episode 53: reward = -139.00, steps = 140\n",
      "06:19:23 [DEBUG] test episode 54: reward = -115.00, steps = 116\n",
      "06:19:37 [DEBUG] test episode 55: reward = -158.00, steps = 159\n",
      "06:19:46 [DEBUG] test episode 56: reward = -109.00, steps = 110\n",
      "06:19:55 [DEBUG] test episode 57: reward = -107.00, steps = 108\n",
      "06:20:07 [DEBUG] test episode 58: reward = -139.00, steps = 140\n",
      "06:20:17 [DEBUG] test episode 59: reward = -118.00, steps = 119\n",
      "06:20:30 [DEBUG] test episode 60: reward = -150.00, steps = 151\n",
      "06:20:46 [DEBUG] test episode 61: reward = -185.00, steps = 186\n",
      "06:21:00 [DEBUG] test episode 62: reward = -165.00, steps = 166\n",
      "06:21:09 [DEBUG] test episode 63: reward = -101.00, steps = 102\n",
      "06:21:20 [DEBUG] test episode 64: reward = -123.00, steps = 124\n",
      "06:21:30 [DEBUG] test episode 65: reward = -126.00, steps = 127\n",
      "06:21:39 [DEBUG] test episode 66: reward = -102.00, steps = 103\n",
      "06:21:48 [DEBUG] test episode 67: reward = -109.00, steps = 110\n",
      "06:21:58 [DEBUG] test episode 68: reward = -107.00, steps = 108\n",
      "06:22:07 [DEBUG] test episode 69: reward = -109.00, steps = 110\n",
      "06:22:15 [DEBUG] test episode 70: reward = -86.00, steps = 87\n",
      "06:22:28 [DEBUG] test episode 71: reward = -154.00, steps = 155\n",
      "06:22:37 [DEBUG] test episode 72: reward = -106.00, steps = 107\n",
      "06:22:47 [DEBUG] test episode 73: reward = -114.00, steps = 115\n",
      "06:22:59 [DEBUG] test episode 74: reward = -138.00, steps = 139\n",
      "06:23:11 [DEBUG] test episode 75: reward = -145.00, steps = 146\n",
      "06:23:23 [DEBUG] test episode 76: reward = -141.00, steps = 142\n",
      "06:23:34 [DEBUG] test episode 77: reward = -122.00, steps = 123\n",
      "06:23:43 [DEBUG] test episode 78: reward = -113.00, steps = 114\n",
      "06:24:00 [DEBUG] test episode 79: reward = -196.00, steps = 197\n",
      "06:24:09 [DEBUG] test episode 80: reward = -100.00, steps = 101\n",
      "06:24:21 [DEBUG] test episode 81: reward = -145.00, steps = 146\n",
      "06:24:36 [DEBUG] test episode 82: reward = -175.00, steps = 176\n",
      "06:24:44 [DEBUG] test episode 83: reward = -99.00, steps = 100\n",
      "06:24:58 [DEBUG] test episode 84: reward = -160.00, steps = 161\n",
      "06:25:09 [DEBUG] test episode 85: reward = -124.00, steps = 125\n",
      "06:25:20 [DEBUG] test episode 86: reward = -132.00, steps = 133\n",
      "06:25:32 [DEBUG] test episode 87: reward = -147.00, steps = 148\n",
      "06:25:44 [DEBUG] test episode 88: reward = -132.00, steps = 133\n",
      "06:25:52 [DEBUG] test episode 89: reward = -93.00, steps = 94\n",
      "06:26:03 [DEBUG] test episode 90: reward = -137.00, steps = 138\n",
      "06:26:17 [DEBUG] test episode 91: reward = -160.00, steps = 161\n",
      "06:26:26 [DEBUG] test episode 92: reward = -105.00, steps = 106\n",
      "06:26:37 [DEBUG] test episode 93: reward = -137.00, steps = 138\n",
      "06:26:48 [DEBUG] test episode 94: reward = -136.00, steps = 137\n",
      "06:26:58 [DEBUG] test episode 95: reward = -114.00, steps = 115\n",
      "06:27:11 [DEBUG] test episode 96: reward = -148.00, steps = 149\n",
      "06:27:26 [DEBUG] test episode 97: reward = -178.00, steps = 179\n",
      "06:27:35 [DEBUG] test episode 98: reward = -112.00, steps = 113\n",
      "06:27:47 [DEBUG] test episode 99: reward = -135.00, steps = 136\n",
      "06:27:47 [INFO] average episode reward = -131.01 ± 29.60\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAD4CAYAAAAEhuazAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO19ebgcV33l+VX19lbtlmTtkhcsb2ALY5sdbGy2mMVJPGEGCAEnxCRkmcnYgUlIiAOBgRBCWJzggAlgmASwARuwDdgYhGV5lRdJlrVZiyU9PS1v7e6quvNH1a26detWdXW/3ur1Pd/3vu6urq6+Xf363FPnt1xijEFDQ0NDo7dgdHoAGhoaGhrthyZ/DQ0NjR6EJn8NDQ2NHoQmfw0NDY0ehCZ/DQ0NjR5ErtMDSIuFCxey1atXd3oYGhoaGpnCQw89NMIYWyRvzwz5r169Gps3b+70MDQ0NDQyBSLao9qubR8NDQ2NHoQmfw0NDY0ehCZ/DQ0NjR6EJn8NDQ2NHoQmfw0NDY0ehCZ/DQ0NjR6EJn8NDQ2NHoQmfw0NDY0uxaZdo/jM3dtRtuymH1uTv4aGhkaX4oGdR/GZu58BgZp+bE3+GhoaGm3G9x87gCcPnKi530TFRsE0UMg1n6oz095BQ0NDY7bgj775CABg98ffmLjfZMVCf9FsyRi08tfQ0NDoUkyUbQwUWqPRZ0T+RPSbRPQkETlEtEF67gYi2kFE24joCmH7hUS0xXvus0TUfDNLQ0NDIwb/fM8z2Lx7tNPDSIXJioX+Qncq/ycAvA3AfeJGIloP4BoAZwO4EsDniYh/gi8AuBbA6d7flTMcg4aGhkZqfOqu7bj6ixtjnz8xWcV133gYxycrbRyVGhMVG/3FLlT+jLGnGWPbFE9dBeBWxliZMbYLwA4AFxHRUgDDjLGNjDEG4BYAb5nJGDQ0NDSaia9u3I0fPn4QX75/V0uObzsssu3EVBXbD435j5/YfwLTVRtTFQsDXar847AMwHPC433etmXefXm7EkR0LRFtJqLNR44caclANTR6ESPjZWzZVzvbZLZBRbwyuM0yUW5+bj0AVG3Hvz9ZsQAAb/38L/G6f7wPjDEcHpvGm/75fvzld7dgomyjv1OePxHdTURPKP6uSnqZYhtL2K4EY+wmxtgGxtiGRYsiC9FoaGg0iOu+/jDe/Ln7MVG2Oj2UtkIk3jj0eeQ/Va2P/CuWg7ufOhTZ7jgMP3rieTjexFMRxnB03LWWdh6ZAAC8+98fxMfv2AoA+PWzRzFZsTDQqWwfxthljLFzFH+3JbxsH4AVwuPlAA5425crtmtoaLQRR8bKAIBfPDPS4ZEk45G9x7Dj8FjtHVMiFfnnPfKv1Dcxfvqu7XjvLZux8dmjoe03/3IX/uA/HsIPthx0x2AFYxidqODEVNV/fO/2I/jOI/sBuJPERKWDyr9B3A7gGiIqEtEauIHdTYyxgwDGiOhiL8vnnQCSJhENDY0W4NzlcwAAdz8dVardhLd+/le47NP31d5RguMw3LppLw6emAptr9rpbZ/JSm3lzxjDdx/Zh10jE3hudBIAcGS8HNpn065wZpE4htGJCh7Ze0x57HLVwWS5Sz1/InorEe0DcAmAHxLRjwGAMfYkgG8DeArAjwBcxxjjZ/L9AP4NbhD4WQB3zmQMGhoa6XB0vIwNf3cXtuw74XvfT+xvne8/Nl3Fzffvgpvb0V58/Edbcf13tuBrG8PL16ZR/sVcetvn5l/uxp9+6zH8093bkTNdV9uS3mOvNynwKwpxDEcnKthxeFx57Kmqjclq92b7fJcxtpwxVmSMLWaMXSE8dyNjbB1j7EzG2J3C9s2ebbSOMfYB1on/DA2NWYwdh8dxxofvxK6RidD2gyemMTJewc6RcZQ962HaI7jpqp2KpDftGsXtj6Vzav/+jq342x88hZ9vb3+yxrc3P6fcXrFqkz/zwpBplP+3HtwLwI0TmAYn//B53H10IvTeouc/OlHGeEzcxXIYGEN3Kn8NDY3uw56jE6hYDnaNhBUl7wxZthyfiKarDiYrFl78d3fjx0/WtoB+60sb8cdea4JaODntetnj0+0NKjPG/PeUCTyN8udzYBry5/tULIa84dJpWXgPx2GYrjqh9xbHMF11MFG20Jc3sXCwqHyPrlT+GhoazceJqWqsGkwDTkhjEumWPRKqWI4/EUxbNk5OWRgrW9h/POyPyxDHlEZB57kSdsL7Hjwx5We+tAIV24HlHX8qQv6135cPLU3AlxN72bJ92+fIWBmjE24Wz+GxwP/3lb9w7iqWg/GyjYFiDkvmqMlfK38NjR7B+X/zE1zysXsafv1UHPlbIvkHto9KkarwoBC4PHRyGgDw7JHxWLsoZ7r0UrWC54+MlfGKT/wMX75/F274zpaIPy5CzMmfFvz38bKF509Mx75OzM+frDai/L2JI4XnH9hmjj9pfPaeZ3DBR++KHKOsOM8V28F42cJg0cTahYPK98hato+GhsYMIBO3iF0jExibrsY+zwuH5KsHrvardtj24R50LTX/yHPH/fsHjk9h18gELvv0vfjZtsPK/fOeEq4Kyn//8SlUbYYb73ga39y0F7969ij2HZtUvl4kfDEd8r/d9GtcnDA5irULsnqvpCB/pw7bh4+xbNkoKyYLcXKr+pNvMKlVLNf2GSjm8NGrzsG7L10dOUbH8vw1NDTahyQlzPHq//tzvPPmTbHPc7UrThDPHBrDQU8ti8ofCMiylio+KRDwwRPT2HF4HIwBu0YC8t49MuFfFeQ95S9aL6MT4TTId968CS/7h58p308k/+OTwXtv8TKUxAlBhDjpRTz/hAnuY3e6ExKvO5UtI8D18HnqZlWwl8qWo5wsRJvp6EQZTx44oVT+A8Uc5vTnsWJ+f+QYWvlraPQARsaTm4lxQnxk7/H4fRS2z+X/eB/+5vtPAXAJR1wWkO9XSxVPVWwMlVwiOnBiCvs9xX74ZGDBfPDWR/D3dzwNAH72i0jGR2t8vtDnEIhaRfQ7j6hTJCeSyD/B8//SvTtxw3e2+AFfSxGXuPmXu/BbX9qI+7YfCU1O5aodsYkYY6F4x5fv34Wrv7AxNPFy5T/oBXVV/n6rlL9ezEVDo4vw/Ml4LxsIK+A4cMLjGS+yJ18Rsn2AgPxFb1553KqNhYNFGEQ4eHwapbyrHQ8JYz4xVQ2O500mYrbP0Yk6yD/G9uHYeWQCL1o5L7KdTzanDBUVAV93TEZCI/mkWPSTB04CcK8+xMm1bDkR8r/6ixv9cwQEwWHxiky0fYCgtQQf46vOPAVnnDIUP6AZQJO/hkabMF62cMeWg3jbi5b5wVAZz59Izrg55rUZ5n66Ctz2OemRk0xKrvJ3MFTKYWza8smolu0zWXZ7yxdzJRw4PoWiT/7hjBZ+HE52E4LvPpqC/EcnKrh/xwjWLhzwt4ntlfMmoWoz7ByJU/7u5100VPTTTf3x+eTvnr87txzEH37jYbzstIXB6xOyfPiE9Mkfh5sZly0nMtE8tEdduXvMm8BNg/xsn0FP3fOFW06dU8Kvbnht7DiaAW37aGi0Cd9/7AD+4j8fxyd/ouqC7oJnsXB7RQYnf14tqgInofGySzInp6Sgp+WgXHUwpy/v7ec+f8/Th7D6+h/iQEzK52TFRn/BxMr5/dg7Oon9x9z9Do0Fyr8skD+3N8amLf/qI8n24ft87I6n8cfffAQPCNlFXPlPVizfuuHN0GRw22dRgvLnS0ht2j0KxoDNuwOiFo970Y13h14/rQjq9hdMt/1yykZwfCIbKJhu/x7B9uGtJfpalN4pQpO/hkabYHqM86V7d8a2Fj7oWShDUmEPYwxV2/Ftn1LexMETU7j5/l2RIDHP9uG2RET9Wm6Gz3ApH9rvgDfxPBizytVkxUJfIYfVCwewZ3QSz3nkf0RS/rzClRPleNnC6//pF3j7F34VCfhylC0ba264A1+691mfmH/y5PP+8zzYLE4eP992RFmbwCezRYPF2CIvvoDgwePuZxaJ+1khlnB4rByyyPjVjIi5fXml8o8Dn8AHizl/0pBtH03+GhoZxHOjk/iXn+2IFDKJAdU4i+WwR6RV6bUf/t4TOP1Dd/rEAQCv/OTP8bc/eAr3PRNunzDlERQnwZOSXz5VtWE7zFf+ctpoXB/7yYqNAU/5VywHoxMV1zoqW77aLttR5X9yqoqtz4/hoT3HYm0f3hTtY3duhelVyorKn6eZjnhN0z561dlwGMOX7n02ciw+loVDRUxJbSt4XIN7/nLjNwCRthgiqYuBco45/QVlwDcO3PYZLOWC+zzgy68A8q135DX5a2g0Gf/wo6345I+3YePOcGtfUUHGKX9ObvLk8PUH3B4y25532xuLivSup8J59lOS8peDpXxSCMg/bAtNViyMTlTwqZ9sw6+eHRG22+grmFi9IPDirzh7iT8expjn+YeV/2PCojFx2Uy7hXTRPUfD5Hv5+sX45Y4RHD457Sv/81fMxWmnDGLfsTB5T1dtHDw5jULOwHApD8bCal32/A8oisVka0eMAcQp/2lP+b9gSe3g7AmP8AeKORzzJkNf+ee18tfQyCxOndsHAPi5VPwkErodUxXL++zL+eiLh93Sf7lX/OLhIu55+lBoMgnaO1TBGIvYPpzsffKXisEmyjY+/L0t+Oef7sCXf7EL+45NwnGYu7BIIYdVC4Jc9MvXLwbg5u9zYuXpjVz5i2MbnahgyXAp8rl3C4T/K+kz/vaGFXAY8NOth3HUs40WDBYxtz8fWWf3f3z5AXzjgb0YKJhCa+bg81UF8q9YDkbGy1jmfV8c/Irtf11xZuT10wrlP7c/D9thmKraeN3ZS/Afv/eSyD4iRNtndDJM/vIk0Epo8tfQqBPHJyv44K2PREiVo+T9cO/bHl4oRVT+cb1teCqknI++Yp5LuM9I7X9/+8UrcXisjLuEFaS4TVG1GcqWEwn4cptnTr9a+U9ULD/o+fTBk3jZP/wMX7zvWT/ge6pAlrwZ2di05X8+Pna54nXBQAFTVRvnLBuOfO49R9VVvgCwdpF7pXF0ooLvP3YQBdPAgoEC5vYXIqmvD3qB26mqrezLLwZ8D52cBmPAulPCbRX4xDygWM6xrFL+3nkEXNIu5qO02i8oeT7mgULOP2c824fv16+Vv4ZG9+GL9+7EbY8eiPSK5+A/aDkYKXr+qgIix2G+J16xHTDGcPDEFN5186bYRm9vfdEyrJjfh5uFxcZFshubtiKeP8+7H/YyimTP/9hExZ+EuC2y8dmjKFuO37r4f11xJr76nov8rKTxskj+YeXPwe2ni9cuwO0feGnoud1HJ3Dusjk+6Z0yFDQ5m9dfAADcseUg7t8xgv/z5vUo5U3M68+HYiBAoJinq45fGRsmf+75k//9nLZIIn9v2FyFi7aPasIf7hPJ30ApFyXuMPlXYBBCNQA8xbOYM0AElDT5a2h0H3h2TS6mUoiToNzNUiR/lfI/MVWF7TBfTVsOw+d+ugP3bj+Crc+rlzIcKuXwstMWYacQpJysWD7ZVGwnavvU8PxHJyq+F83BXSpOUte9+jS88oxFfqByfNryyd4nf0H5v+WFp/oTXjFv4nSpcGnv6CRWLujH0jmuJcTVPuD634Wc4beneM0LTgEAzO0r4MRUNXQu1wi1ASrbh383RMEiK2csDpO/431YPrFNesrfsh1lz6W5fQXhPXMhUhe3c0xUbORNA4WcQP7eeSQirJzf71/ptRKa/DU06gQnsbhCrYrtkoXE/eGAr8Lz5342J8Cq7firSgHAS9bMj7xmoJBD3qTQRCOqXsdhCtvHU/4x2T67j05Erky4by8HIgcVyt9P9bQcvOMlK/H4R16HlULPmr68GSI+PqZ5/XnMH3CJdJ2gxos5A6Wc4fv7/Z66n9ufh8OAe7Yexlv+5ZfY9vyYP87zV8wNFmJX2D62w/DE/hMYKJhYf2rYhuLkLyv/kzHN9kTbp1QwQ98Zh2zjFCTyF+s67vzgy/G+l69Rvlczoclfoyfws62Hse4v70jshpkWnEDiqmxjlb9A/vJqT0CQCbOEk7/FQipy7aIBbPu7K/GHr1oHwFWvpbyBnGH4x7NsN4efe8iMqa0KICB/ebGVZz2/X6yw5ZOVTGL8SmBs2gq6gwrKvy9vYriUR1EIYPblXevIFK6cHMZgEKHPO55I/kSEvoLpt13gpM7toPfdshmPPnccf/ndLRgvW3jz+afiu++/NNHztx2Gx/edwNnL5vhXL/5n9d6Ib9+0axRbnz8ZsZg4eL0E/2xq5S+Rf85AwQy2DQhj6C/kYoVFM6HJX6Mn8Jm7t8N2mE9sMwEn2pwRo/w9kndY2N4Rs30chfLnaZ5c+Vdsxw8eAy7RFnNBFkt/3gQRee0O3GPz1g6cTBxFtg9HX95EwTQwEVOcJFov3NKRO0yaBmGgYGK8bPnBUFH58+BnUVC5fQX3fkEgONvxyN/bf8FgYKXwsQLuhMePJSruV5+5CA/tOYb9x6cwXMrBMAhLhkswDcJPhawrMQ31qYMncd6yOSHi5WMBAvL/+gN78ZHbn4xdQ4B/HsCtKi4qMnXk95BtH3kCagc0+Wv0BDgHJ/TzSg3enz4Xp/xjUjrj8vwf33ccq6//Ie7d5hZrLRFsH5UvzG0FvrxfzqTIylVckTuM+YVjMgo5Q6lSOdYK6vuoNzGpslAGSzmMT1u+3VW1HVi2A9thfvBTnMT4ffGzOQ6DaZA/ucjBYv6aPm/CA4C5/cEEwesNxDGeMlzCuy5ZjW9u2usXkfHvxmHu93GOEGQG3OIv/t2IyyfuOzblt7OYIwR4AYQCvMvm9oUmOnlMHPkc+Z/fNEj5mlZDk79GT4Avys2Le2YCy04+VhzJV2KU/0+3usr0/z20D0TAKUMB+YutG3hrX07YnFByhgHbYWCM+QVK/d6+lsOw5+gkTjslukpUMWeESFnGOkH586sSVXvhoVIeY+WqT9iWw/x2zErl772nuM1hLvG+9+VrMFzK4ZVnLAq9Bx+nSKKi8l+1QAwQB6T9ijMWgrFgOUW5fmLhYDF0NdNfyPnfTd4kP6h/6OQ0njs2CaJwUFn8jO7xCjHkr1D+nngYKAQTWjuhyV+jJ8Dt92b8xjihxy28IqpWMXAqruAkbhcJeLAYZItU7fCiK77y94nQfeyvmGUHi4VzknxudBIV28HZp0Zz61Xkv35psJ+o/Plx+xRtBwaLbndQ1QIx/Pji+3DPXlT+tsNgGISzT52Dxz9yBRZLhWCqytd5gvIX1bjYE59bS3FLVRbzhq+8Czn3fnCVGFxRVW2Gx/adwOKhUqTpnhjgJSIQhZU8SWmdfFz883fC8gF0S2eNHkGa5cLvefoQTkxV8bYLlifu5xNJTKGWSIK2zWA7DJ/40VbsOByka4pXBCWBKIZLeT+WULFYmPyFXHD3saf8PYKznGCRFk7SO7yisLNPHcZtjx4IjbOYiwYnRa9dzLXnUNk+Q6UcRicqOCR44jyjiI+1JAV8gTD5Vx0nciW18YbX+JMOH6dY+coJ/6LV8zHcJ6p3gfxzMvmHvzNu2QwUXcVPCL4bOZN38+5RnLV0ODJhqqyzUt70v7u8YYTiGwBX/ob/3p2AJn+NngBv7pWk/H/vq5sBoCb5WzWUfyirx3GwadcovnTfztA+YiJQUVL+hRxX8k4oV56TRElSwdya4BW9QECAvCL4nFPnRMZZzBkRO6KYM/H5d1yAx/edCOWvc6haTQ8Wc/jFMyO4/jtb/G3jkvJX2T4iITIWdD3lWDonqCQOul0G728ahJ/86SuwfF5f6EpK/Ex8KUn+ncirlRUFC812WOi7k62YyYqNZXP7fCFx41vPwavOPMUP6ovZX6W8Ad4zLmcG/j5vh31yuoqCMPF0Atr20egJcIu9GZ4/V5GqdE0g6vk/czhaoCWmgYrEOFjK+YQVtX3CFsqAb/t4yt92fM+fk+WOw+OY05cPtWTgKBXMiOVQzBt4w7lLcf3rX4ChUi5kp5gGhawWf8wK8hpPUP68elX2xo2E5bV8z19S3WcsHkJ/IYfBglr583Pz062H8fd3PB21ffyrqJwXTA6eU41m2bw+/0pt+bx+LJvb579GPMeiFZQzyB/HNRetAOC2s+i07aPJX6Mn0IqAb1Wu4vIgt3F4+mBA/lyliwFfMetzsBiQf0Umf8n24UFdnnVkOcxPt+RkeXyyggUDBZ8QxQyToWIuYuOIhGwYhLv+9BW4+kL3Smj+QEFJ0KoOlFz5F3zyD47LrRa5GCppaUX5akeGOK4+he3z9Qf24qb7dobW9xXH0F800ScFXlX/K5evXxzKPAKAU+f04b0vW4N/f/eLhfEGn1dM6zx3mXsFtn7pcMfJX9s+Gj0BP4jXhIAvV+1plf8T+4OWxn15E2NlC6IAFf3/sPJnof7xsu3jK38jeqXASd1y3EIxTojDpRxGxitYNFgEEUUsB5mQTxkuYa6n/hcMRFU/AGX+Ox83J1B+XNMg3x6Rq3xl20dEPa2Oxc8ke+27RsIN5Phkt3rBAKartt9VFUBI+n/4jWdh3aJBXLByHu7ccjA0JsMgfPhN66XjCspfsH1KeRNP/s0VMIhw7/bDkfG2E1r5a/QEfM+/CccK2hjEe/5BBo6D7YcC5c8tD5HwxSuIoWIuyFDxllvkGCxKyr8QVv62E6R6cmKybAaD3HTMv3rTerzdi2ec4rWITlL+HHwfufCKQxX35oFVrqTFgC3fJpN/ku3TJ6ntJIj75HPhY/KUVQ7u+X/i6vPwT9e8KKT2DYKfcvrel6/Fq72eQsFVSDx9iso/JwR8SzkTA8Wc368ICDp6thta+Wv0BLi1kibrx/YKjlTY8Hd3+W0Y4rJ9KraDvryJqm25la/ClQAnJpH8xSuIoVLOJyxRyZ86p+TntQc577zIS7xS4CmZgfLnhPael63Btx50F4VZ5DWPU3n+Mnix00BBTRc3vvUcDPfl8J2H9wfngDdQ8x6rUj5lVZ5kyXHFn6bVsaik8zXaJHCFzvcTv3Yiwld+98WQi7FVn0XGC5YM++2l84LyFycM3t5BK38NjRaC/35VbRVkqJbqA9wqVHElqiTlz4lZXrDcJ39hHGKmymAx7xPR+7/+MO7fMYJL1y3Ar254rUA6PEUwnO0TSvX0rzDCKZRckS8a4so/TDxJ7YjjiHfxcAl/9JrTpXMg2z4K8pNtnwQ2Cl5fm/xVqZ4ixG3yJE+S8ieiyBXJYI3JEAA++pZz8KnfPB+AOzn3SRO2OA6d6qmh0UJw2ycF92OqYkdIEYh24pRzxjkqloP+IffHfkSyGbjt44SUv2DtlHIRRSxbMfMHCjh32Rw/eOiTv1DkFbJ9hJdzT5uTv1yxq1L+nPr6EshurtTygAe9OZfKQVLV50qj/FPZPooiLxFXnX8q/t9D+5SvDWf7qMfzlhcuw+LhIubFxEA48t7nyxmE15+7FH3SQjidDvhq5a/RE6hH+U9bakUvr7tbtR1s3j2KHzweFE8xxlzbxyMgrvz5wim8cZmo9sX7Q0K2D4cchC3mTHz/j16Gl6xdAABSaqjX3qEQtX0AYJ5nHfFK3loBXwD+wuRJlsuwTP5WmPx95Z6PV+WJ5K9o7xAHMR1UZfu8QmodISJE/jHDmdOfx5XnLK05jrw3KedNA4PFHN503qmh53WRl4ZGG+DUqfyTjsFh2QxXf3EjAPg/bK54uSXAA4ynzu3DyefHfAU8WbG8RVdyIc/fzfYJs45KjYsIpXpaDgqm4dsZlmT7/I9LVmPtokG8/PSFANIFfHlL5CTVLdsngefvbicir5FcPPnHxVkA9ZWDjH/5nQvwzU17Q+2Qeeto22FYs3AAX/zvF0YWiBchnquZZobxiSeuAeCqBf24ZO0CXLBy7szeqEFo8tfoCfgB3xTkP11Vk39E+Svy/DnpceUvkv/W58d88vrgrY8CAHZ//I2hgq/BYs63CzhqdXzMCame01UbxZzh+9RVL9uHwzQopHxl31r1Xu94ySps2jWKd126OnEcIip+tk+wrZQzlDn4HEl5/kGqZzxlvfG8pXjjeVFFnjdd8l+3aABnLhnC/uPx6wWHyH+GuWH8e8zHtP4eKObwzWsvntF7zATa9tHoCQTZPilsnxjyl7leledfkfLsue3Ds2tUAcuQ7aP0/JOtDn6lYHnZPsW8ESLSJDulP+L5R99r0VAR33jfxX6cIA34eRDfu5Q3Q8r9srMW+3YYkLLCt4G1bbkCDwrc4mlPHEHSZJTqfb0DJF3RdBKa/DV6AtyyicnODGEqTvlLlw2TCnuI2z6i8u8vmP5jFZGLAd+hksrzr2X7CI3dqu7Sj6GslQTyiaR6NqmvPO/tL847S+aUQt06X7x6Pv76zWf7j5OKvBYPF0EUrHVQD/hn4uc+bu1lebwzbbPsB3xjbJ9OQ9s+Gj2BwPZJo/wDMh4vW9h1ZALnLp8TsX32H5/y7zsOw7u/8iBet34xgEChjoyXMa+/oEw5BFyFzLOGSnkDiwZLEaVY0/MXLJ5py/aUfzhlMQ5iVpNBbquCRvHld23Ag7uP4Yv3Piso/+D5r73nJYrCLuF+AtmuXTSIX9/w2kir5zTwlb8ZtLaIA4Vsn5khJwR8uxHdOSoNjSbDD/im2FdU/r//tc148+fuR9my/WPc8PoX4Nxlc7DvWOAdj5Ut3Lf9CD78vScABKQ6Ml7BnL68oDbDIxidqMB2GOb257HpQ5dhTn84awZAaK1XFYLGbsxX/mltHzHV85G/eh3OXR7t/pkWrz1rMX77xW7jsqDFRfDec/rzEdvLSHmFAqAh4geC88MnUf5dqE5LuMirobeLvG/SlUYnoclfoyfAKTdJ+XPvXPT8eZUmY0HAd25/HjmT/J71quOKWS3zBvK+NSPHDUbGy7AcB3nTCC0ELsKOaSDHEWT7uKmeJUn5JyldMc1QXp6wEYgLywC1CTTtFcpMwK82ZOWvImUxyDtT24e/76xU/kT0SSLaSkSPE9F3iWiu8NwNRLSDiLYR0RXC9guJaIv33GepE+uXafQc0mT7cHI4Ol7B6ut/iNsfO+AXY4nkbxBFMjhkS0gMTA4V8/6SfXLcYGS8DMtmfnBQhbgYBLjUzQcAACAASURBVEfQ2I0rf0NKWUwI+KYomqoHnOjKioCvCmknqWaMqeAXXcXbP3w4zRgKn1y61fOf6ZR0F4BzGGPnAdgO4AYAIKL1AK4BcDaAKwF8noj4f9kXAFwL4HTv78oZjkFDIwVqB3w5OewecfPAP3PXdiFQzPz7pkGRH7SVQP7FvBEof4n8j45XYDkMpnS8r77nIrzbS60UYxAq+Mrf5srfDHnpSdzDx6VatasRiO2ogdq+eVp7aibgE6/YWRRQp2DyMTRDkwa2T3cq/xkFfBljPxEe/hrA1d79qwDcyhgrA9hFRDsAXEREuwEMM8Y2AgAR3QLgLQDunMk4NDRqwUkR8OXkz9V52XL81zmM+ereJf/wD7oiVQX3SQ3Mlnll/cvn9Yf2OzpRRtV2IkT0yjMW4aAXUI5LPeXg5F/1irxk5V+LVL/x3peE1uudCfhY5N4+cRB9/paRf05S/t4YVTEGPoRmjISTv1y01y1oZrbPewB8y7u/DO5kwLHP21b17svblSCia+FeJWDlypVNHKpGr4GlSPXkfu9kxfXyxW6cDgtUu2v7hH/QcjM4MYWymDfwpvOWoi9v4sWr5+Oz9zzjP7dp1zGYhtoaWLnAnShWLxxI/Gx84rD8Ii9TClwmk8+lpy1MfL4ecOtMbu8Qh7Dt07RhhBDN84/3/Pl4mjERcdLvVtunJvkT0d0Aliie+hBj7DZvnw8BsAB8nb9MsT9L2K4EY+wmADcBwIYNG9IkamhoKOEr/4R8H674eSBXJHTGmL8Ai8r2ka2ZM5cM+fcLppt3f9n6xZGVpO5++hAA4Cyv146IS9ctxDffdzFevHpe0kcTbB9X+ZfyRojw2xlvFNNO06Adto+f7SNV3CZ5/s2Q/vlcxm0fxthlSc8T0bsAvAnAa1lwTb0PwApht+UADnjblyu2a2i0FGl6+/DgLl+CUF6RSwz4yraPeJUwtz8fUv5xLYRfdeYi/HzbEQDx1sAl6xbED9iD39jNcTzbx6zL9mkmTINApK7wVaETtg9/G2W2j6/8Z/6+fJLpVttnptk+VwL43wB+gzEmNsy4HcA1RFQkojVwA7ubGGMHAYwR0cVels87Adw2kzFoaKRCimwfPkHwxcfFtXi/8cBe3LJxNwCXGCK2j+DLn7tsTojk48j/rKXDfnuDmWS6hFs6e7192qCoVSAvE0pu6RyHdmT7FCTlz79neQJ3x+PezrSvDyDaPhlV/jXwOQBFAHd5M+avGWN/wBh7koi+DeApuHbQdYwx/ut4P4CvAOiDG+jVwV6NlkNu6fx/vvcEXrxmPn7j/KDNri0pf3Gi+NRd2/37qoAvV/4LBgr45NXnhy71xZYJYguDvEHoK5g4OW3FNv9KA9O3Whyht0/64qlmI2dSeuXfxKKqOOT9bB9O/u52VStl3/VpwlhMg7BwsIDFTcqkajZmmu1zWsJzNwK4UbF9M4BzZvK+Ghr1gkkVvl/79R587dd7QuTPyb5WXr0hLELOweMD//D287BkTsmfQIDwgiIiEefMoMXxTIKCRO54nj54ErbDsGr+gGSnNHzohmAQ+ZNsVyh/yfZZvaAff/za0/GbFy6P7NvMgC8R4Z4/e1WkeV63QPf20egJpEn15AHfuH7+HCaRr+yHSjmMTQfr9PJ8/VyM7SMiZ5K/bOJMiS9nGLh3uxs/uPS0BR2zfQCX8HndQ+08f4H8Wx7wNb3xEf7s8jOU+zYz1ROAsl1Ht6A7zSgNjSaDZ/mk8fzLMSt5cYjZPrwlQtnL9uGkH+f5iyiYhr8e70xbAORMQtVmWLWgH8vn9Xcs4Au4xMmD57XSTOtJSW0UcqpnEvgYeqHvgCZ/jZ5AUqrn6EQFtz26P9J3Jw4GkT+J+OTv2T5cvYoqVrWOLOBOFLx//kyVPye4C1e5aaEUUv4zOnTdMAzyr6Jq2j5G622fotTbJ3E8XPn3APtr20ejN8ArdZ1oH5733bIZD+05lvpQpkF+NktA/o7/HBAmtXjbJ/D8Z5oOyN93idf5stPKX0yLTUI7i7xqtcYGxGUnWzOWboJW/hqzCnIRFYfY0rlqhyX+ziPjdb2HaQQ2D+/EyVswqAK3ceSfNwmlJhUC8c+90FsxrJPZPm7A171f2/MP7rfc9kmj/L1d2j1hdgKa/DVmDX7xzBGc/dc/xrc3Pxd5jmv9smXj5HQ19Fyt7B4ZBgXKn6cLBso/+pOKVf5GsKbtTHu+81XFFg5x8hfHO6ND1w0i8tclrivbp1Xkn/NSPVN0MPWVf0tG0l3Qto/GrMH+Y24jtL/4z8fxthctC+Xic+X/gW88EnqN7bCaXTNlmAb5RV18MRSf/BUEVozz/IVsn2b1f+FrBVMnbR8K1i2o9d6m0fpxFupQ/n62j1b+GhrZgejkyxk7cVk+ExW1TZQEUfnzFbv4ZKAKWsZ5zXkh26dZVaCLhgqRcbR7AXGDousWxCEUmG4RG730tIW4+sLlmD9QSDGe3vH8tfLXmDUQF0KXff04iKtxpYWr/D3bx7Nt+NWD0vOPWYYxZ1BQ5NUkgg48/2Bbu4mMEBR51Yo3tKPI66ylw/i/v3l+qn2D9g6zH1r5a8waVIROknJ//TiMN0j+vGPjUIl7/vHKPz7gazQt1ZODZx91MtvHoOBKq54ir24IsjazwrfboZW/xqyBqPYrKZX/eLlaeycJBhE+efV5+PoDe7Fh9XwAyZ5/YoVvnq/t25yO5Srbot22j+iX1+JQ0e3qBsJtZm+fbodW/hqZxV/d9gTed8tm/3HVEm2fdGT69i9sDD1Ow5OmQVg8XMKfXX5GZM3aepR/zjD8Fb/kZSBnCqMOAm42wgVmyW9ObbB96gFp5a+h0f24ZeOe0OOQ8k9p+8go5Iya2T+mgrAS8/xjgrl5M/D8rZSTVRy+/fuX+OmVQKdtH2HiqWPfLuD+nlD8HJr8NWYNRM8/bcBXRjFn1iR/MSuFB2obUv6mEVqIZSa4aM388BhF26cDqZ7B/Rqpnl3n+bu3XTCUlkPbPhqzBlYDnr+MUooWACLBc6Uv9/YRUYy1fcifPOSWEzMFdVBR12M5hVM9O8+4vRTw1eSvMWtQr+1z3avXRXK/edvfJIgEz9syBF09FRW+sbaP4U8kM7V9VOhUk7KQ8q+xb6ixWxcQLmnlr6GRPdRr+7zmBYuxdE4ptI2r9CQRaiiU/zRX/grPP07R5sxgURhrhraPCnxiaXu2j3C/ZoVvqAdRiwZUB3op4NsFp1tDozmoV/kXTMPP0+fg1bh9eTNW/YWVv2f7SP380yBvGH4voFYo/2YuRl4P6rF9OrnojAok3c5maPL3YNkOdhyur7ujRmuw4/BYQx54tc4K35xJGCyGV1oq+r12jNgFVuSlGIEg4FsPgeXMYFGYZqd6umNxbztq+9ST6tkF5O9/f50fSsuhyd/DZ+5+Bpd9+l7sGpno9FB6GjsOj+OyT9+Hz9y9vfbOEiyb+cRTSaGk86aBhYNhz5/783mTkI+RzKGAr5/tY4cep0HOJN9maoU1w4ms/b196inyEm2fzjMuH0I3XIW0Gpr8PTy27zgAYNeIVv+dxOGxaQDAg7tH635txXYw4DVaS2P75E3C9a9/AS5Zu8Dfxm2fnGHEkpHS9rEcENVHYAXTwEvWLMDvv2ItPva2c1O/Li2MDtk+oQrfGvt2svW0Cn6FdIfH0Q5o8vcwr99VgM+fKHd4JL0NTliNuCBV20G/12gtne1jYG5/AX971dn+NlGJx6l4MTDJlStj9Tdny3nZPje84SwsHi7VfkGdoA6p2HoCvt1X4eveauXfQ+B92feMatunk+A/OpayJTAQ5MhXbQeDxfqUPxAmHe75502KJSMzZGsEk0S9hNGsTp5x6FTOujg5Zi/g6yn/zg+l5dAVvh7Gy65nu2dkssMj6W1wMqhH+VdtB6Zhomox9BfTK/+8EfXbxf76cWQkbzcNguWwCJnf/oGXYjxmWUkAsQHlZiHwr1v6NhGQoP27YTGXetBLC7hr5e/h5JTb3XH30dYq/6mKjZvv39W0Lo6zDeTbPunPD8+UqTqC5y+Qf9y5ziUo/1yi7RPezklcvlI4b/lcXLpuYey4W21zqBaTbwfqebt29POvB0YPef5a+Xvg67ru85YCbBU+8eOt+Pdf7sbSOSW8/tylLX2vLKIR5c/bOlRtB3O9fvYVywFjDGtuuAPvfdka5es4aYtVudzzz5mkLNhSERTf1qzVuJqFThUs1dPSOdwBtEUDqgO6wrcHccJT/jxlr1U4POYGlFuR1z0b0Ijnz9s3Vy2GYs6EaRCqtuOf43+7f5fydSrFzpuw5QxDmXeu2sZjB91gW4joVNpiPS2d+fkk6g6rRVf49iBOTrnebKs5mVeCxjX76nWQr/zrsX0C5Z/PGSiYBqo2S6yaJRIUe4zto1L5qhYEquN0AzqV6tlIS+duKPACtPLvSXDbpx7SaQT8yiKuza+Gi3pa3XCSr9iOW5xlEiqWk9gvRwy2ip64n+cfk+2jIqmcInDcDfDJv4t7+3RbaqUO+PYYpqs2KpaDgmmAsfosh3rB2wD0wj9XI+BXXvVMwlXB8y+YBgo5AxXbSVT+YvVuWPnzCt+g744IFZHywLFqIRcVXrBkKNV+M0WniLW+ls4Eg7qjqRsgpHp2eBztgA74IlD9c/rzODJWBmOtu+zj5G812G9+toOTfj3zL/f2LZshb7q2T8VyEhdIyQtXXqpsH9MgqOK3SQHftNbFd//wpZiqtja2BHSywle8X/vNDaIuVP6dHUc70CXzbWfB/X6eKdJK66fs/ejTrjHba+BXXY0o/4rtuG2Sc4Yb8E04x2KGj0r55wy18lcGfOu0ffoKZmQdgVaAD6f9C7iHb2vBIOoiz18HfHsKXPnP7efk37r34pWnjS4zONvBT0tdAV+e7ePZPnnTJf+kzqB5waIJF3kFFb6qAK7K9ulU3/xa4GNtt8VYb658vT2RWokeauqpyR8AJrwqTN4aoKXKn9s+LVi8YzYgre0jxmWCbB/J9kmYYMWAr0iOBaG3T72pnmk9/3ah07ZPWvVsGtQVOf6AXsax58AFIieEVib8lK2AqDSi8Mm/5n7B/arN4DgMtuOSfz5noGKzxFqKOKIWA75JaZ0i+r2q4m6xLjh826dTyr8e26dL2J8id2YvNPkjIBxOCK1V/q7n34qVm2YDWMpsH/HKybKZH9zN5whF00C1lvKPSS8Jd/VUZftEXzPcl/Nf000ISLgzFb6UkkGJukdp+1ZZh8fRDvQ0+ZctG5bt+L1feICvleRf0bZPIvi5r7WSl/h81XH8K6mCaSCfI1Rqef65GOUveP5Kf19BUkMlN1akmiw6iU4t48jfLi2fu7ZPd9Btt9UdtBLd9d/aZpz54R/hmpt+7VsIPMDXyoCvtn2Swc99rflXJHbLZqhawRq6POCbdI7jiLog9Ps5b9mcyPOqCWHYI//uU/7ubftX8nJvs2n71GdZZRkzIn8i+igRPU5EjxLRT4joVOG5G4hoBxFtI6IrhO0XEtEW77nPUoernTbvORbYPv7CHK0h5lCQUmf7KOGkTPUMk39g8fD2DhXLSTzHhZgmbKZBfi//P738DHz5XRvCzyuVf3eWy3QqeFlvuqRB3UO2ehnH9PgkY+w8xtgLAfwAwF8BABGtB3ANgLMBXAng80Rkeq/5AoBrAZzu/V05wzHMGCzi+bfmfcrCAiOzqbHbziPjDS24rkLaPP+w7cP8Fs5BwNdpKOBrGsCCgSIWDBRgGoQLV82Tnlcof68+pB2FW/WgUwVL/vum3r+LlL8u8koHxthJ4eEAgiSNqwDcyhgrM8Z2AdgB4CIiWgpgmDG2kbm/8lsAvGUmY2gGOEeYvu3TGmIWF/ZIs9JUFnBkrIzL//E+3PP0oaYcz/Hz/JP3k5W/JXj+Bc/2SSL/U4aKyu1EhO9d91K89+Vr/cciVIqQK/+pSpeRf4fqD6jOQHM3FXn1guLnmPH1KhHdCOCdAE4AeLW3eRmAXwu77fO2Vb378va4Y18L9yoBK1eunOlQYxHYPq0N+E4I5D9bAr7jZQu2w3Dca4k9UwR5/jWUP5M8f0H5DxRNHJ+s+nEAGW954am48a3qBdNNIiyZE6ynK/OmUvl7nv9EJX7Vrk6gY7YPv03t+XeP0tYVvgKI6G4iekLxdxUAMMY+xBhbAeDrAD7AX6Y4FEvYrgRj7CbG2AbG2IZFixbV/jQNQlb+rUr2EW2B2ZLq6Xv0TbJ9gsZuyfuJ588K2T6Ec5fNwdi0hW2HxpSvHe7LY6Co1j0yucskoA74usea7Dbl3zHbp94K3y6yffhtdwynpaip/Bljl6U81jcA/BDAX8NV9CuE55YDOOBtX67Y3lFw4sq12PYJ+dSzhfwd7tE353hpPX9HqvDl5zNvGrhw1XwAwAO7RpWvTVJ18lOR9XoVL+We/0TCer2dAFexbS/yMvhtPRW+3cG2usI3JYjodOHhbwDY6t2/HcA1RFQkojVwA7ubGGMHAYwR0cVels87Adw2kzE0A5xIAs+/Re8juBCzxfbh58pu0oTpK/8aX4IlTaQ8sydnEtYuHMDc/jw27Tpa9/vLClTmAJVC5Z5/ucviOH7mStv7+den/I0uKvLqpd4+M/X8P05EZwJwAOwB8AcAwBh7koi+DeApABaA6xhj/Jr4/QC+AqAPwJ3eX0fh5/l76X+tWlxdVKuzRvk33fZJ19snmuoZxG0Mg3DWkmFs3Kkm/ySiidg8KQK+3PPvNnS6t09dAd8usX16qaXzjMifMfb2hOduBHCjYvtmAOfM5H2bjWief2vfB5g9ef6chJuV6tlQnr/D/Csp3mStv2AqXwckk2HE5jGSHwM6z19GvZXFRhc1dqs3UynL6OkKXw4WsX3aofxnB/mn7cWTFgH5J+8Xjp8EqZ786o0vx6hCkg0iP5Um2ycXUzDWaXRuJS9/BKn31y2d24/ulCxtgGhTRNs7tIr8g/vVWVLklVappz6e1M//vu1H8NyxSbzjJatC+8ntHSwpaM9X5BLRXzAxWbETL+mjnn9t2wcA3nTeUrzqzFPiD9wB8M/SqWUcUyv/Lszz75YYRCvRs+QvBijbFfCVferZAH4em/Vx5JbO77x5EwDgtzesCCnsUMDXcUIBXyDozslhUJD1kpztE33OoOB/Io7QPvc7F8Qes1MIFnBv7/vWn+ffPdk+vZTq2Z3Xq22ASMRyqmerevuEPf/ZofwbWXYx+Xjh43JsfT6csy+fy6r/Hbr/0nxFLo7ff+U6/5edpEhVto5ITN0SmEyDTvWpqbels2F0zwLu9a5FkGV0ySlvPxymsH14tk+LeJnNStvHu216wNd9fK7XWfOhPcdC+4WKvOxA+ecVyv+nf/5K/MUVZ6a6pFdxu7h/tyjUNOhUtSqlmGRFdFO2T72ZSllGz5J/SPlL2T6tLvIimj22Dyf9puf5e8dbPOy2WpDJPxQ8d1g04CuQ/5qFAyCiVD9sFVGKm7qFpNIgaOncmfetJ9WzWybV4Kpl9qNnPX+xxqpdjd34cYs5Y9bYPpz0W5Xnz+2fkfFyaD9Lip9EAr6C7SMr4JnYPt2SlZIGnVrJq17r5I9ec5q/dnKn0UstnXuW/EWlKrd0blWePz9uwTT8ZQezDv6ZmqX8Za+fH1fugmpLyzjyPP8g2ydKJn4wL0HXqX70It93S1ZKGnQuzz98WwuvPWtx6wZTJ3TAtwegsn1avYwjf89i3pw1ef5p8/LTHy/8mJ+zii2Tf3C/6rCgwtfP84+meqYpPqrl+XeLQk0DPux2T1j1Bny7Cb20hm/PKv9wYzDZ82/te84q28dpje3DwR8mK//kgC+HH4hMYH+VRcI3/dnlZ+BN5y1N/gBdBG5htVvF8rfLkEPmIxh7BgdfJ7IjY5oMUflzYmm95+/eFnLGrFH+vu3T5JbOHPy4ctM0fvoMcvfhE7iZxvap83fNJ4tL1i3A2kWD9b24gwjy/Dvl+WePQP0xZ2/odUOTPwLyz/uef2sDvgXTmDXLOPLP1CrP34nx/LnHX8gZcFiQ7ZM3eLZP1PZp1AM3U9hF3YhO2T5Zbo6mK3xnOX717Ajuefqw/zhQ/q3N8/dtn7yJsenu6v3eKFpp+zgO8x/Lyp9vz5uGp/ydUI8YVW+fevPPg9dlU8l2rqtndn3zHhL+vUn+v/OvD4Qec2LxPf+WtXR2b4vm7LF90q68Ve/xAPdqwvE9//AqWeKavQ5zW2SL7R9Utk+jqi6r6X/15ts3C1kulMryVUu96FnbRwRX/vzH3brFXLjynz22D2uy7SMqf9thsZ6/qPxd28dBXpC4cnsHEfWSEv+/yFKaJyCMu0OLuWTNJgPEsWdw8HWiJ5W/jIrtWQbe990Ozz9ucfGsoelFXlKfft/ztx0wxnzi5pNnIWf4AV+R5JTKny8vqPhd/9f7L8XDUhWx/7qMqsF6++o3C/75yqB5Um+NQpahyR+u8jeIfL+4dZ6/e1vMz54iL38ZxxZk+9h2QP6MuYTPg/J2hPwd5EO2jyLPP2F5wQtXzcOFq+Ypx9SpHjkzRadsH6NDKabNQFbjO41A2z4AypYdWk2oZame3PbJmbMmz581vchLVP5OqJhLzPjh5J83DXdisJlfoQ3Eef7ebZ1SmF8xZKmvDxCMt/22j3ebQQINrlpmPzT5w/WTDQr+WVvd24enerbKXmonmr6Yi6j8pXNUVpB/wSTYzK3wzQl9gdXZPo2puk5lzcwUOtunfuiWzj0G3/Yhnuffmvfh/jhvETAbgr5cmTfL9mEs7Pmr6jHE9xNTPcPKX2X7uKiXDLNatNSpZRyDSuq2vm1T0Klz1glk8OtpPtyAbxtsH+75c/KfBdZPs4u8Itk+TE3+YsCXF3nlQtk+SXn+9f2wG60P6DQ6VbCU7YBvdq9a6oUmfwDlqgOi1qd6clXLLQm5WVkWwfyAbPNtH9caC8i3bNmYKLvFcY6g/B0WDfgWFE3sG81+6VTK5ExhdGjSyqpNBmQ7XlEvNPkjUP7UYuXPrYqSZ0nILQuyiGbbPmHl78B2GPq8nP0nDpzA2X/9Y9z26H5f+bu2TzTgG7cWb9xzSUiz9m83onN5/vxOts4XoD3/nkPZsr08f+75t9b26St45D8LlL9v+zTpozBJ+TssIP+H9xwHANy77QgcxtxF2Q33KqDqhAO+KjRawJPV3O9OpS1m2TrJajV3I9DkD1eBmwa1zfbh5F+u2km7ZwLNXsA9VORlMzgO86t1ueVjGgTLI3vTcLN9LNsJef4qNOrdZ7XZl1i42E5kNUYC6N4+PYeK5SBnGi0P+HJrhCtZuWVBFuE3dmtRqqfNGIaK7r/pRMUl/5yX4WMYLiH7AV8z+pM9c/GQf7/RYq2s5vlfcfaSUBykXchqdhQgXi11eCBtgCZ/uORfyBlCnn9r3se3fWYR+Te/wldu7xCcr8mKe6WUMwi2oPwdL9VzIB/+d378I68LBX4bbdOQVR/4/BVzcf6KuW1/304FmpuBIB04g4OvE5r8EU31bHVvn9Issn2aXeQlnvtvbtoLRwj4+uRvuuTP4zQ2Y54NFP7BDpfyoceNdpvManuHTiHTyzj2kO+jPX+47YDFIq9Wt3eYXcqfB3ybZ/vMHyjgd1+6Gv/50D4cnaj4qbGc/Hlhl2vVERwn2tJZhUZTEHspCNgM+Kcpg6craP6XwcHXCU3+HkJ5/i3i5Nls+zSzt49BwPqlw/62QPmHA74GkZvtkzbg693W38/fS5nsAUJoBmZDS+cMDr1uaPL3YBqtz/N35GwfaxbZPk30/IkoFFzl52ui7Cl/z+fPGe5+vKVzLeWPGSp/0r+WVMh2hW/4djZD/zt7EFs6t6q3D1e1vL1DuZp95e8v4N60VE94+fsC+UeUv+H37yee7eOEF3NRodEiL+3514dg4fgOD6QB9NJ3ncGvpzUgISe6lcrfIPKbjmXV9qlYjk/ErVjD1yQKFWyVlAFftzbDJE/5x6R6imjc9gnfaiSDMqz8dUvnHkQ44Nua97Adt5c8V/7yurRZwT/dsx2/9aWNAFrR2A0R20dekpEIsBl824ev4WvWkJqNB3x7Rw02A1nOlc9yjUK90OTvwe3n795vlfJn3PbxsleyqvyfP1HGoZNlAELAt2ntHdziLZH8TSPcqM1xGGzH8RbgCfL887WUf4NZO3wsmvzTwU/2yeD5Csbe0WG0BZr8PYT7+bfO9jGJfCLLKvnzhmtAYPc0r8LXtcbEzB2TyF8DwX1/eEVebm2G7bd0rtHbp0FF2qm1cLOKLFsnWa5RqBea/D1QHbbPtufHcGKqWvd72I47yeRMtzI1q9k+luOmVgKtyfM3JNvHMCjUn59PPnw/O63y57cNe/6znxCagWyv4eve9sJEr8nfg2mkD/j+9k0bcfP9u2Kf/+WOEUwrqnfdNEb3fjFnZDbbxxZW2HKane3jnaMQ+RNhsBgUo9uMeUVeboaWag1fFWbs+fcCIzQBWW6RkNVWHo1Ak78Ht59/OuU/Pm1h3OswKeOnWw/hHf/2AL7yq92R5xhjPqkVc0ZmbR/Ly6sHgolSxf0npqq46nP349kj46mPzRTK3zQIw31BqwbbQVDkJbR3qBXwbdTz71R3zKwiyy2dG20BkkU0hfyJ6H8SESOihcK2G4hoBxFtI6IrhO0XEtEW77nPUofPMrcKKGVvH+YRjRXTwP77jx0EoK4GtT0/G3DXmM2q7RNS/k687fPc6CQe23cCTx88mfrYvBZC9PyJgKGSoPwdBw7zPH/P9gFQM8+/0QIe0RLUqI0sE2ijzf+yiBmTPxGtAHA5gL3CtvUArgFwNoArAXyeiHi+3hcAXAvgdO/vypmOYSbgwVdxMZck24cTTdzi6z/fdhgAMFCM9szjaYyAm/GTdeXPGEvs6mknTAxx4AFfUwr4ik3a+MpdhkEhRZ6+t08jyr8H2KBJaoB6AQAAGD1JREFUyLJ1ogO+9eEfAfwFAPEXfhWAWxljZcbYLgA7AFxEREsBDDPGNjJXXt8C4C1NGEPD4FkkafP8rQRCsx2GY5NV736U2F3bx72fbc+fB3uTu3ryc1XPQvWqPH/ToJDyd7jn7xV5cdQK+HI00tsni9WqnUKms328216w+Wb0L01EvwFgP2PsMempZQCeEx7v87Yt8+7L2+OOfy0RbSaizUeOHJnJUGMRkH+6PP8k5V8VrKCqgvB4hgqQbduHk7nl2S9ADPl758OqowiA10KYIdsnrPwtx4HtxU8MaZJIwkwCvlr5p0ejy2V2A7J81VIvavbzJ6K7ASxRPPUhAH8J4HWqlym2sYTtSjDGbgJwEwBs2LChJcn3nPxFXzcpccUnPoXnL14NqK4MeBojkO2Ar2jnJK3hW8siU4GfIzFn3yREAr6219tHJPxatk+jXrTY8VWjNrLcHK3RpIAsoib5M8YuU20nonMBrAHwmPdjWg7gYSK6CK6iXyHsvhzAAW/7csX2joEvc2cKAd+kPjVcxaoITbQ3VM87XvUq4E46lYySvyWQOif95tk+wcLsHIZs+ziB7SOq+NqN3RpX/j3ABU1DlpujZXHMjaJh24cxtoUxdgpjbDVjbDVcYr+AMfY8gNsBXENERSJaAzewu4kxdhDAGBFd7GX5vBPAbTP/GI2DZ5Xw9WCBZM8/KYgp2huqKwMnZPvMAuVvMz8zKul81KP8bYe3dBaXX5QCvp7nL9sxNZW/cLx6INtQGsnwT1UGT1mWM5XqRUuWcWSMPUlE3wbwFAALwHWMMW5wvx/AVwD0AbjT++sYOMmI6i7J87cSrAxxm1r5Y3Z4/sI5SPb8+cRQj+ePSKqnQSSlegZFXiIp1+7t05ify3sIaaSD75t3eByNIMsL0dSLppG/p/7FxzcCuFGx32YA5zTrfeuFbOlwkiGvyIsoOc8/yfMXA77KbCDP0gCynerJydx2glRPlVXGz4Eq+B0Hh0W9fNOQPf9A+ctZQUlovMiLeoIMmoUs++b8gjN7I68fPZfAJrchCDo2wrulGqme8VaGSPjVmFTPkO2T0VTPQPk7QZGXYsKsNpjnL6d6RpQ/Y7C9Ii/x8rxWY7dGC3jcTLBeoIPmIMstnX3l3wOzfe+Rf4zyF4OBaVI9VYQmKlxboXYdJ2z7TFaslnUQbSVU2T4qZ8dO4fmPTlRw7/YgjddhUY890tvHZn6RVz15/o2mIK5aMIA1Cwbqek0vo9HYSjcgyzUK9aLnyF8mdrlXO9VU/vGev13D87cZ8xXFGYsHcXLawt7Ryfo+QBcgyPMXKnxVyj/BIuP41oPP4T1fedDPfOJXRznJzlE2djMolBWUNtWzXlK67tWn4dt/cEldr+llZNrzz3Keap3oOfKPKH9Ttn2a4/mrCpuY4Plfss5tg/SrZ4+mH3yXQFy03U4I+KZp7zBVsWA7zD93PCguXnYTAQsGi/iX37kAS4ZL/vuahhHO9mlRqqdGfZgN2T698D/Sc+Qv85CY7cNvk7N9gmCnjHqKvNYtGsDi4SJ+uWOkvg/QBRCvfvhEyVh00ky6SuLgcQE+qfKWzrLyB4A3nrcUi4eLXn2B2yojVORV6xfbQ2l8nUSmA766t8/sRVy2D48V1gr4JlWthvP8Y9o7CNlF55w6BzuPTNQ1/m5AyPN3ots5/PYOCbZP1bN7KpLylxu7cRgGCb19JOWfurFb4m4aM0SmWzp7t73wP9Jz5B+X7SNmKKTJ868V8I2t8BX+qYp5I2QVZQVBzx4WOp/yuU3T3qEq9f/h1piYuSMqdZMolOop2kNpV/LKoiLNErK84H0v9fbpOfKPVf7CP2ya3j4q0q4V8GUs/IPIm9kk/0D5OyGrRz5vado7cNunagW2j5xXL+fyh4q82pDqqVEffMs/g+fZj/dm8rqlPvQc+dfO86+l/OM9/3CRl3pyMCPkn71UT5HUxdMQa/skKX/Z9nGCgjsOeSIIF3kFz9VaxjHLPWeyBL9QKoOneemcEn5rw3JcvHZBp4fScrSkvUM3Iy7P3xSIodGWzqEiL1Wev7CGL+CSfyXTyp+Fg9wxAd+k9g5iwRgQtcYARNo2xxd5pavwzSIpZQlcNWcxsJ4zDXzi6vM7PYy2oOeUv8xDPNsn8PzT5fknef45YWlBEYyFLYyCSZmzffgylkC4tw8QtdT89g4JJ5RPftz2ka0xIBzwDSn/SJFXrcZuWvm3A4F1otHN6Dnyl9VpTmH7pMnzj1usBQBKeTO+yEu2fTLW30e2ecRTJX9kfo5U1c7+PpLtYwttrznEc2YS+QVhuUg//3TKX5N/a5HlgG8voffIX2Io0wz/oxpEylYFHIHnH92JP1fKG+qWzrLtk8ue5x9KZ5WUv3xu07R3COIHge0j2wXiZGAY5E8U9a/kFb7VaA20vZYN9Bz5y35+NM+/8WUc+VVBMadW/o5k+3DPP0v9fcKFbE7osXzeZD9fBW578UlQZfvIyr8qkr84mdbI9smyF50lZLm9Qy+h58lf7OkD8CKi+Ncnef6c5Ip5Q13h64Rtn4J31VHPYiedhrxmAZNsoNC+dvy54uAWTlVQ/rJ7E0r1NCXbh9LbPuIEr9E6BKme+kR3M3qO/NNk+6Tr7RMf8C3FKv9wJgsPUGYp6Cv697Zk+8Qp/6TPJ+/jSHERIN7zjxZ51fp31l50O5Dlls69hJ4j/0i2j6KxW7LtE65IDT/HA75qz9+Wlb+3eDzPdMkCQsrfDlf4yqfETqiJ4JBtH57nL0LO8/eVv7SSV+3GbvxWs1Iroc9zNtB7ef4eWb1o5Vy88oxFPvmIBUBpbB+HeTaOQDj8WKW8iclKdIlGVYUvgEzl+svN60LZP7LyF1o/x8Fv+yy1dxAhV/jy8yWv4Zt2JS9tRrcWWe7t00voOeXPyeuDrz0df3LZGUKeP/zbNAFfIL6XTVyqpyOlMRYyaPvI2T4sIdsnVXsH77NzNe+wNAHfoJ6C871c8KWCbuzWHug2GtlAz5E/J3auEmWroFZvn1DzNltNdrG2j5znnyPvmNkh/3qyfepp7FYVWjon5fnLqZ3+91gj2Avoxm7tgq6nyAZ6jvw5IfEALycPzlu12zvEL9hSK9UzzvbJEvnL2T7ix5TPG/9cie0dJNvHYVHPX17MXdzOz2etNE9A9/ZpF4LVsDo7Do1k9Bz58xYEhqT8OXGlbekMqGwOBwa5rYXVi7mos33KGarylT3/JNvHTmH7VCTbR+X5i4/Fzp2i8jfTKH9tR7QFusI3G+g58rcl20et/BNen9C2uWq7C4zkYrp1iou5AKLnn6FsH2nNAtthfh99WeCnWslLZftEKnwF20cK8PLHtdo5A7q3T7ug4+rZQO+RP1f+pFb+hpHc26dqJyldBzmTvMZuqjV8s2/7RLN9mE+80a6etVM95TWRawZ8xRbOwgLutRZycY8TvtVoDXppQZQso+fIn/NTYBe4p8BJrfwDopZJ21X+rhWRrsjLC/hmyPaRl6pkLAi2Roq8Eha+4fC7eoZ6+4T3CXf1DP5lxVTPVAFfHYhsC/R5zgZ6jvwD5e8+5sTCfM+/1gLuyR53zjSQN43YNXxDvX1yWc/zd+Aw5l/ByC2dk1ph+Ptw8ncSevuEgrzBfbHIK03AVyvS9kDHe7OB3iN/FrZ9OJn4tg9FWxOHXp/g+VuO4yt/dcA3nMmSRc9frnOwGUMppw5c12rvIBaJ8asftwo6vJ+c5y9uD77HFFRDLjHpnjOthdwvS6M70XPkz9Wpv3A7uGXhPl+rt0+S5295tk/OIGX7BzmTJeuev+UwOA4w3JcHAIyXLWnfZM9f/NxJvX3CqZ7Bv2zOMIKsrZp9fdzvVtNR66GzqrKBniN/OduH/4MG2T7pevsAUdK2PNsnZxh++wf5vcMrT3VnkZdlOzhwfEr9nKj8bTfV0yf/6TD512rvECJ/wfaRFaP4UM7z5+czTcB3uJT3x6rROgQtnTX7dzN6j/ylbB9D5fknLuYSr/yrtmv75GJaNTtOeKESv7dPlwV8v/PwfrzmUz+PKHkgqvxtxjBcUiv/Wqme4lVU1QqUvyziQ1aPVOHLLwRqNXUDgHddugq3X/eymvtpzAxBJXVHh6FRAz1H/nJ7B0Pp+afr7SOTmhvwDYKQ8uQgpzH6XT27zPPfd2wS01UHo+OVyHPy5OcwYLjP7Q8YtX2CgK/KSmvI9plBnn9/IYeVC/pr7qcxM+iWztlAz5E/5xtTUv5hzz/+9VaC5+8XeXlkVZUuIWRV2yrPv2I5eM9XHsSWfScaev3xqSoA4KmDJ7DzyHjoOVvR2K0vb6KQMzAm2T7i51IveB+1fVTtHUgifPE+/x7TpHpqtAdBYzf9nXQzeo78g/YO7mPyyT+wg5JTPeM9f7HIC4guXC6r2lZ5/vuOTeKnWw/j/h0jDb3++KRL/n/+7cfwJ996NPRcWPk7/hoFQ8UcxsvV0L5JV0lA1PaZrrptsIu58L+lTPgcOWEN3zQBX432QCv/bKDnfjFywJdzCef7ulo6K/Lacwb5hWNRzx/Klaeaned/dMK1a46Olxt6PVf+ExU7EviNZPsw9xwOlnLRgG9N8g9fRewdnQQALJ/XF9pPXswl2C7m+Wum6Rb4yl8HfLsaPUX+j+w9hhu+swVAYPsEqZ6i8o8/RhKhWZLtI6d7yhW+fp5/k1fy4qTPJ4F6cWIyeN3oRCVM+DYPjAftHYgIg8VcxPYR21qrWlzLnv/ukQkAwOoFA6H94hZsyZna9ulGBD2UOjwQjUT0FPk/uHvUvx+093Ufi6metdbw5aT9u//+ILY9PxY8J9k+cpWvbPsYXk1As22fES9QOzJD5Q+4HvyoMIn4C9Z4basdr2p5sJjDmCLbJ5gIk22fiuVgz1FX+SeSv1TkxR+mCfhqtAc6zz8b6KlfzP5jgYUR5PnXq/ydkCf98N5jwnMuEXIVKipmxlgk2wdwrZ/mk3/Zu21M+XPPXz4eEJB4IWfAtgPbZ0hh+9gO88+VKuBrSbbP7qMTmNufx5z+cC5+yOqRPP96FnPRaA8M31LV30k3o7fIX/CvTUn5O77nX2sxF4Zi3vQf7z464d+3bLfPDa9CfWzfcdx037MAwi2jReRNar7nP9645287DCenw+R/ZKwsPO+OtZgzPM/fvZoZLOaief42Q8k7V6oJjn/uvryJqu1g7+gkVkmqH5D7+cdk+2jl3zXQlJ8N9NQvZp+g/DlXyEVetXr7WA5DKR+ctj0jk/59XuTFg49f+Pmz+Ps7tuLIWDlURyCikGu+8j864ZL16EQlUmVcC2PT1Uiqq0r5F/MGbMdxG7EZhKFSXlHk5fjkr071dLf1F0xULAfPHBrHakUeflKqJ1eZaSp8NdoD3dsnG5gR+RPRR4hoPxE96v29QXjuBiLaQUTbiOgKYfuFRLTFe+6z1Mb/kJDyjy3ySu7tY9ksZPuIyl8u8trqxQMe3nssaChnKGyfJgd8ud1jOQwnpqo19g5Dtnzc44nK3yN/z/O3vSC2nO3jeJlAxbw68wkIbJ/+oomdRybw/MlpbFg1L3F84pVTzgiu3lI1dtNoC4KWzp0dh0YymqH8/5Ex9kLv7w4AIKL1AK4BcDaAKwF8noi4V/IFANcCON37u7IJY6iJk9PVUDaK3H/EL/Iyai/jWBJsn72jk/5k4QY4jYj//PCeYwm2T2s8f06G/CogLXiwl1/dGBSOHfjKP2f42T7c9qnYDsqWLe3nnitVi2v+ufvzOd8CuvS0hf7z//X+S/GHr1oXeo3K/zcN8tNmNToP3dI5G8i16LhXAbiVMVYGsIuIdgC4iIh2AxhmjG0EACK6BcBbANzZonHgvV99EHuOTkYI1s8P90iDt1ogIuwemcTln75Xebw9Rydx5pIh//FkxcZln74XBhH2HZvEi1bMDXWeHCrl8PUH9uLupw8BiKqhvEm466lDse/XCPYcncSqBf3YeWQC7/73B9EnTFa1MFlxyXvV/AHsGpnAoqEibt20Fz/behhAkPlTzBl4eO8xvxHbUMn9V3r9P/0CphA34ZPI+27ZHCne4pNxX8Ed3+LhItYuDDz/C1fNw4XSlYCq1YNpUKrePhrtgd83S38nXY1mkP8HiOidADYD+HPG2DEAywD8Wthnn7et6t2XtytBRNfCvUrAypUrGxrcyvkDPrFfuGo+rrloBR7cPeqT/iXrFuC6V6/D7750DQDgtzasSLR9Tl88iN+8cAUe2XsMrzxzEb62cY+vWk9fPIi3X7gc55w6B1dfuBz9BRMvP30RvvuI+5HXnzoHrz3rlNDx3vvytfjFM0ca+mxxOGPJEN5x0Up879H9yuZstfDy0xfirS9ahm2HxpA3jcj41iwcwPqlc/DDLQdgEOHKs5dgqJTDVS88Fppk1586B//9JSvxrc3P+dW7Mub1F/D6c5bim5v24rVnnVLTJ96wah7edsEyLBgoYK6XFXT9lS/AhtXz6/6cGq3BgoEC/vSyM/CaF5xSe2eNjoGSiA4AiOhuAEsUT30ILsGPAGAAPgpgKWPsPUT0LwA2Msb+wzvGlwHcAWAvgI8xxi7ztr8cwF8wxt5ca6AbNmxgmzdvTv3BNDQ0NDQAInqIMbZB3l5T+XOiTvEG/wrgB97DfQBWCE8vB3DA275csV1DQ0NDo42YabbPUuHhWwE84d2/HcA1RFQkojVwA7ubGGMHAYwR0cVels87Adw2kzFoaGhoaNSPmXr+nyCiF8K1fXYD+H0AYIw9SUTfBvAUAAvAdYwxbvq+H8BXAPTBDfS2LNiroaGhoaFGTc+/W6A9fw0NDY36Eef56+RoDQ0NjR6EJn8NDQ2NHoQmfw0NDY0ehCZ/DQ0NjR5EZgK+RHQEwJ4GX74QbjFaFpClsQLZGm+Wxgpka7xZGiuQrfHOdKyrGGOL5I2ZIf+ZgIg2q6Ld3YgsjRXI1nizNFYgW+PN0liBbI23VWPVto+GhoZGD0KTv4aGhkYPolfI/6ZOD6AOZGmsQLbGm6WxAtkab5bGCmRrvC0Za094/hoaGhoaYfSK8tfQ0NDQEKDJX0NDQ6MHMavJn4iu9BaQ30FE13d6PCoQ0W5vQftHiWizt20+Ed1FRM94t8mrmrdubDcT0WEiekLYFjs2IrrBO9fbiOiKLhnvR4hov3d+HyWiN3TDeIloBRH9jIieJqInieiD3vauPL8J4+2680tEJSLaRESPeWP9G297153bhLG2/rwyxmblHwATwLMA1gIoAHgMwPpOj0sxzt0AFkrbPgHgeu/+9QD+oUNjewWACwA8UWtsANZ757gIYI137s0uGO9HAPxPxb4dHS+ApQAu8O4PAdjujakrz2/CeLvu/MJdO37Qu58H8ACAi7vx3CaMteXndTYr/4sA7GCM7WSMVQDcCndh+SzgKgBf9e5/Fe4i920HY+w+AKPS5rixXQXgVsZYmTG2C8AOuN9B2xAz3jh0dLyMsYOMsYe9+2MAnoa7nnVXnt+E8cahY+NlLsa9h3nvj6ELz23CWOPQtLHOZvJfBuA54XHiYvEdBAPwEyJ6yFuwHgAWM3fVM3i33bQSdtzYuvl8f4CIHvdsIX6p3zXjJaLVAF4EV/V1/fmVxgt04fklIpOIHgVwGMBdjLGuPbcxYwVafF5nM/mTYls35rW+lDF2AYDXA7iOiF7R6QE1iG49318AsA7ACwEcBPApb3tXjJeIBgH8F4A/YYydTNpVsa0bxtuV55cxZjPGXgh3nfCLiOichN27cawtP6+zmfzjFpHvKjDGDni3hwF8F+4l3CHy1kf2bg93boQRxI2tK883Y+yQ9+NyAPwrgkvkjo+XiPJwifTrjLHveJu79vyqxtvN59cb33EAPwdwJbr43ALhsbbjvM5m8n8QwOlEtIaICgCugbuwfNeAiAaIaIjfB/A6AE/AHee7vN3ehe5a5D5ubLcDuIaIikS0BsDpADZ1YHwh8B+7h7fCPb9Ah8dLRATgywCeZox9WniqK89v3Hi78fwS0SIimuvd7wNwGYCt6MJzGzfWtpzXdkS0O/UH4A1wsxKeBfChTo9HMb61cCP3jwF4ko8RwAIA9wB4xrud36HxfRPuJWcVruL4vaSxAfiQd663AXh9l4z3awC2AHjc++Es7YbxAngZ3Mv1xwE86v29oVvPb8J4u+78AjgPwCPemJ4A8Ffe9q47twljbfl51e0dNDQ0NHoQs9n20dDQ0NCIgSZ/DQ0NjR6EJn8NDQ2NHoQmfw0NDY0ehCZ/DQ0NjR6EJn8NDQ2NHoQmfw0NDY0exP8HFCaNyhpYgRoAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "def play_episode(env, agent, max_episode_steps=None, mode=None, render=False):\n",
    "    observation, reward, done = env.reset(), 0., False\n",
    "    agent.reset(mode=mode)\n",
    "    episode_reward, elapsed_steps = 0., 0\n",
    "    while True:\n",
    "        action = agent.step(observation, reward, done)\n",
    "        if render:\n",
    "            env.render()\n",
    "        if done:\n",
    "            break\n",
    "        observation, reward, done, _ = env.step(action)\n",
    "        episode_reward += reward\n",
    "        elapsed_steps += 1\n",
    "        if max_episode_steps and elapsed_steps >= max_episode_steps:\n",
    "            break\n",
    "    agent.close()\n",
    "    return episode_reward, elapsed_steps\n",
    "\n",
    "\n",
    "logging.info('==== train ====')\n",
    "episode_rewards = []\n",
    "for episode in itertools.count():\n",
    "    episode_reward, elapsed_steps = play_episode(env.unwrapped, agent,\n",
    "            max_episode_steps=env._max_episode_steps, mode='train')\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('train episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "    if np.mean(episode_rewards[-10:]) > -120:\n",
    "        break\n",
    "plt.plot(episode_rewards)\n",
    "\n",
    "\n",
    "logging.info('==== test ====')\n",
    "episode_rewards = []\n",
    "for episode in range(100):\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('test episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "logging.info('average episode reward = %.2f ± %.2f',\n",
    "        np.mean(episode_rewards), np.std(episode_rewards))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "env.close()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
